Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring & fixes #930

Merged
merged 9 commits into from
Dec 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ repositories {
}
}

val milibVersion = "2.2.0-24-master"
val milibVersion = "2.2.0-25-master"
val repseqioVersion = "1.6.0-4-master"
val miplotsVersion = "1.2.0"
val mitoolVersion = "1.5.0-20-refactoring"
val mitoolVersion = "1.5.0-26-main"
val jacksonBomVersion = "2.14.1"
val redberryPipeVersion = "1.3.0-9-master"

Expand Down
5 changes: 5 additions & 0 deletions changelogs/v4.1.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ experimental setup and study goals.
- add tags info in `exportAlignmentsPretty` and `exportClonesPretty`
- add `--chains` filter for `exportShmTrees`, `exportShmTreesWithNodes`, `exportShmTreesNewick`
and `exportPlots shmTrees` commands
- fixed old bug #353, now all aligners favor leftmost J gene in situations where multiple genes can ve found in the
sequence (i.e. mis-spliced mRNA)
- new filter and parameter added in `assemblePartial`; parameter name is `minimalNOverlapShare`, it controls minimal
relative part of N region that must be covered by the overlap to conclude that two reads are from the same V(D)J
rearrangement
- fixed NPE in `assemblePartial` executed for the data without C-gene alignment settings
- by default exports show messages like 'region_not_covered' for data that can't be extracted (requesting `-nFeature`
for not covered region or not existed tag). Option `--not-covered-as-empty` will save previous behaviour
Expand Down
1 change: 0 additions & 1 deletion itests/case003.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ mixcr analyze generic-tcr-amplicon \
test_R1.fastq test_R2.fastq case3

[[ -f case3.clna ]] || exit 1

6 changes: 3 additions & 3 deletions itests/case008.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ mixcr analyze generic-tcr-amplicon \
--split-clones-by V --split-clones-by J --split-clones-by C \
CD4M1_test_R1.fastq.gz CD4M1_test_R2.fastq.gz case8

assert "cat case8.align.report.json | head -n 1 | jq -r .chainUsage.chains.TRA.total" "237725"
assert "cat case8.assemble.report.json | head -n 1 | jq -r .readsInClones" "199563"
assert "cat case8.align.report.json | head -n 1 | jq -r .chainUsage.chains.TRA.total" "237722"
assert "cat case8.assemble.report.json | head -n 1 | jq -r .readsInClones" "199557"
assert "cat case8.assemble.report.json | head -n 1 | jq -r .clones" "25614"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .longestContigLength" "227"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .clonesWithAmbiguousLetters" "1070"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .clonesWithAmbiguousLetters" "1069"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .assemblePrematureTerminationEvents" "3"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .finalCloneCount" "25614"
6 changes: 3 additions & 3 deletions itests/case009.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ mixcr analyze generic-tcr-amplicon \
-M assemble.sortBySequence=true \
CD4M1_test_R1.fastq.gz CD4M1_test_R2.fastq.gz case9

assert "cat case9.align.report.json | head -n 1 | jq -r .chainUsage.chains.TRA.total" "237725"
assert "cat case9.assemble.report.json | head -n 1 | jq -r .readsInClones" "199563"
assert "cat case9.align.report.json | head -n 1 | jq -r .chainUsage.chains.TRA.total" "237722"
assert "cat case9.assemble.report.json | head -n 1 | jq -r .readsInClones" "199557"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .longestContigLength" "227"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .clonesWithAmbiguousLetters" "1070"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .clonesWithAmbiguousLetters" "1069"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .assemblePrematureTerminationEvents" "3"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .finalCloneCount" "25614"
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,12 @@ public PreCloneAssemblerResult getForNextGroup() {

assert alignmentInfos.size() == assemblerInput.size();

report.inputAssemblingFeatureSequences.addAndGet(alignmentInfos.size());
report.inputAlignments.addAndGet(localIdx + 1);

if (assemblerInput.isEmpty()) {
CUtils.drainWithoutClose(alignmentsReader2.take(), DummyInputPort.INSTANCE);
report.groupsWithNoAssemblingFeature.incrementAndGet();
return new PreCloneAssemblerResult(Collections.emptyList(), null);
}

Expand Down Expand Up @@ -237,6 +239,8 @@ public PreCloneAssemblerResult getForNextGroup() {
int numberOfClones = consensuses.size();
report.clonotypes.addAndGet(numberOfClones);
report.clonotypesPerGroup.get(numberOfClones).incrementAndGet();
if (numberOfClones == 0)
report.assemblingFeatureSequencesInZeroPreClones.addAndGet(alignmentInfos.size());

// Accumulates V, J and C gene information for each consensus
// noinspection unchecked
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@
public class PreCloneAssemblerReport implements MiXCRReport {
@JsonProperty("inputGroups")
public final long inputGroups;
@JsonProperty("groupsWithNoAssemblingFeature")
public final long groupsWithNoAssemblingFeature;
@JsonProperty("inputAlignments")
public final long inputAlignments;
@JsonProperty("inputAssemblingFeatureSequences")
public final long inputAssemblingFeatureSequences;
@JsonProperty("clonotypes")
public final long clonotypes;
@JsonProperty("assemblingFeatureSequencesInZeroPreClones")
public final long assemblingFeatureSequencesInZeroPreClones;
@JsonProperty("clonotypesPerGroup")
public final Map<Integer, Long> clonotypesPerGroup;
@JsonProperty("coreAlignments")
Expand Down Expand Up @@ -60,8 +66,11 @@ public class PreCloneAssemblerReport implements MiXCRReport {
@JsonCreator
public PreCloneAssemblerReport(
@JsonProperty("inputGroups") long inputGroups,
@JsonProperty("groupsWithNoAssemblingFeature") long groupsWithNoAssemblingFeature,
@JsonProperty("inputAlignments") long inputAlignments,
@JsonProperty("inputAssemblingFeatureSequences") long inputAssemblingFeatureSequences,
@JsonProperty("clonotypes") long clonotypes,
@JsonProperty("assemblingFeatureSequencesInZeroPreClones") long assemblingFeatureSequencesInZeroPreClones,
@JsonProperty("clonotypesPerGroup") Map<Integer, Long> clonotypesPerGroup,
@JsonProperty("coreAlignments") long coreAlignments,
@JsonProperty("discardedCoreAlignments") long discardedCoreAlignments,
Expand All @@ -78,8 +87,11 @@ public PreCloneAssemblerReport(
@JsonProperty("coreAlignmentsDroppedByTagSuffix") long coreAlignmentsDroppedByTagSuffix
) {
this.inputGroups = inputGroups;
this.groupsWithNoAssemblingFeature = groupsWithNoAssemblingFeature;
this.inputAlignments = inputAlignments;
this.inputAssemblingFeatureSequences = inputAssemblingFeatureSequences;
this.clonotypes = clonotypes;
this.assemblingFeatureSequencesInZeroPreClones = assemblingFeatureSequencesInZeroPreClones;
this.clonotypesPerGroup = clonotypesPerGroup;
this.coreAlignments = coreAlignments;
this.discardedCoreAlignments = discardedCoreAlignments;
Expand All @@ -99,11 +111,17 @@ public PreCloneAssemblerReport(
@Override
public void writeReport(ReportHelper helper) {
helper.writeField("Number of input groups", inputGroups);
helper.writeField("Number of groups with no assembling feature", groupsWithNoAssemblingFeature);
helper.writeField("Number of input alignments", inputAlignments);
helper.writeField("Number of output pre-clonotypes", clonotypes);
helper.writePercentAndAbsoluteField("Number alignments with assembling feature",
inputAssemblingFeatureSequences,
inputAlignments);
helper.print("Number of clonotypes per group:");
helper.print(ReportKt.format(clonotypesPerGroup, " ",
new StringBuilder(), new FormatSettings(0.0, Integer.MAX_VALUE, 0.05)).toString());
helper.writeField("Number of assembling feature sequences in groups with zero pre-clonotypes",
assemblingFeatureSequencesInZeroPreClones);
helper.writePercentAndAbsoluteField("Number of core alignments", coreAlignments, inputAlignments);
helper.writePercentAndAbsoluteField("Discarded core alignments", discardedCoreAlignments, coreAlignments);
helper.writePercentAndAbsoluteField("Empirically assigned alignments", empiricallyAssignedAlignments, inputAlignments);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@

public final class PreCloneAssemblerReportBuilder implements ReportBuilder {
final AtomicLong inputGroups = new AtomicLong();
final AtomicLong groupsWithNoAssemblingFeature = new AtomicLong();
final AtomicLong inputAlignments = new AtomicLong();
final AtomicLong inputAssemblingFeatureSequences = new AtomicLong();
final AtomicLong clonotypes = new AtomicLong();
final AtomicLong assemblingFeatureSequencesInZeroPreClones = new AtomicLong();
final ConcurrentAtomicLongMap<Integer> clonotypesPerGroup = new ConcurrentAtomicLongMap<>();
final AtomicLong coreAlignments = new AtomicLong();
final AtomicLong discardedCoreAlignments = new AtomicLong();
Expand Down Expand Up @@ -54,8 +57,11 @@ Map<GeneType, Long> getGeneConflictsMap() {
public PreCloneAssemblerReport buildReport() {
return new PreCloneAssemblerReport(
inputGroups.get(),
groupsWithNoAssemblingFeature.get(),
inputAlignments.get(),
inputAssemblingFeatureSequences.get(),
clonotypes.get(),
assemblingFeatureSequencesInZeroPreClones.get(),
clonotypesPerGroup.getImmutable(),
coreAlignments.get(),
discardedCoreAlignments.get(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ public SecondaryReader readAlignments() {
return new SecondaryReader(false);
}

public SecondaryReader createRawSecondaryReader() {
private SecondaryReader createRawSecondaryReader() {
ensureInitialized();
return new SecondaryReader(true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class PartialAlignmentsAssembler extends AbstractCommandReportBuilder<Par
final int kOffset;
final int minimalAssembleOverlap;
final int minimalNOverlap;
final float minimalNOverlapShare;
final boolean writePartial, overlappedOnly;
final VDJCAlignerParameters alignerParameters;
final TargetMerger targetMerger;
Expand Down Expand Up @@ -81,6 +82,7 @@ public PartialAlignmentsAssembler(PartialAlignmentsAssemblerParameters params,
this.kOffset = params.getKOffset();
this.minimalAssembleOverlap = params.getMinimalAssembleOverlap();
this.minimalNOverlap = params.getMinimalNOverlap();
this.minimalNOverlapShare = params.getMinimalNOverlapShare();
this.alignerParameters = alignerParameters;
this.targetMerger = new TargetMerger(params.getMergerParameters(), alignerParameters, params.getMinimalAlignmentMergeIdentity());
this.usedGenes = usedGenes;
Expand Down Expand Up @@ -217,8 +219,10 @@ public void searchOverlaps(OutputPort<VDJCAlignments> input) {

int actualNRegionLength = nRegion.totalLength();
int minimalN = Math.min(minimalNOverlap, actualNRegionLength);
int actualNRegionLengthInOverlap = nRegionInOverlap.totalLength();

if (nRegionInOverlap.totalLength() < minimalN) {
if (actualNRegionLengthInOverlap < minimalN ||
1.0f * actualNRegionLengthInOverlap / actualNRegionLength < minimalNOverlapShare) {
droppedSmallOverlapNRegion.incrementAndGet();
cancelCurrentResult.run();
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
getterVisibility = JsonAutoDetect.Visibility.NONE)
public class PartialAlignmentsAssemblerParameters {
private int kValue, kOffset, minimalAssembleOverlap, minimalNOverlap;
private float minimalAlignmentMergeIdentity;
private float minimalNOverlapShare, minimalAlignmentMergeIdentity;
private MergerParameters mergerParameters;
private long maxLeftParts;
private int maxLeftMatches;
Expand All @@ -36,6 +36,7 @@ public PartialAlignmentsAssemblerParameters(
@JsonProperty("kOffset") int kOffset,
@JsonProperty("minimalAssembleOverlap") int minimalAssembleOverlap,
@JsonProperty("minimalNOverlap") int minimalNOverlap,
@JsonProperty("minimalNOverlapShare") float minimalNOverlapShare,
@JsonProperty("minimalAlignmentMergeIdentity") float minimalAlignmentMergeIdentity,
@JsonProperty("mergerParameters") MergerParameters mergerParameters,
@JsonProperty("maxLeftParts") long maxLeftParts,
Expand All @@ -44,6 +45,7 @@ public PartialAlignmentsAssemblerParameters(
this.kOffset = kOffset;
this.minimalAssembleOverlap = minimalAssembleOverlap;
this.minimalNOverlap = minimalNOverlap;
this.minimalNOverlapShare = minimalNOverlapShare;
this.minimalAlignmentMergeIdentity = minimalAlignmentMergeIdentity;
this.mergerParameters = mergerParameters;
this.maxLeftParts = maxLeftParts;
Expand Down Expand Up @@ -90,6 +92,14 @@ public void setMinimalNOverlap(int minimalNOverlap) {
this.minimalNOverlap = minimalNOverlap;
}

public float getMinimalNOverlapShare() {
return minimalNOverlapShare;
}

public void setMinimalNOverlapShare(float minimalNOverlapShare) {
this.minimalNOverlapShare = minimalNOverlapShare;
}

public float getMinimalAlignmentMergeIdentity() {
return minimalAlignmentMergeIdentity;
}
Expand Down Expand Up @@ -123,6 +133,7 @@ public boolean equals(Object o) {
kOffset == that.kOffset &&
minimalAssembleOverlap == that.minimalAssembleOverlap &&
minimalNOverlap == that.minimalNOverlap &&
Float.compare(that.minimalNOverlapShare, minimalNOverlapShare) == 0 &&
Float.compare(that.minimalAlignmentMergeIdentity, minimalAlignmentMergeIdentity) == 0 &&
maxLeftParts == that.maxLeftParts &&
maxLeftMatches == that.maxLeftMatches &&
Expand All @@ -131,7 +142,7 @@ public boolean equals(Object o) {

@Override
public int hashCode() {
return Objects.hash(kValue, kOffset, minimalAssembleOverlap, minimalNOverlap, minimalAlignmentMergeIdentity, mergerParameters, maxLeftParts, maxLeftMatches);
return Objects.hash(kValue, kOffset, minimalAssembleOverlap, minimalNOverlap, minimalNOverlapShare, minimalAlignmentMergeIdentity, mergerParameters, maxLeftParts, maxLeftMatches);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import com.milaboratory.core.alignment.batch.AlignmentHit;
import com.milaboratory.core.alignment.batch.BatchAlignerWithBaseParameters;
import com.milaboratory.core.alignment.batch.BatchAlignerWithBaseWithFilter;
import com.milaboratory.core.alignment.batch.LeftmostBatchAlignerAdapter;
import com.milaboratory.core.sequence.NucleotideSequence;
import com.milaboratory.mixcr.basictypes.HasGene;
import com.milaboratory.util.BitArray;
Expand Down Expand Up @@ -168,7 +169,10 @@ protected void init() {
singleDAligner = new SingleDAligner(dAlignerParameters,
genesToAlign.get(GeneType.Diversity));
vAligner = createKAligner(GeneType.Variable);
jAligner = createKAligner(GeneType.Joining);
jAligner = new LeftmostBatchAlignerAdapter<>(
createKAligner(GeneType.Joining),
0.6f
);
cAligner = createKAligner(GeneType.Constant);

Chains chains = new Chains();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ private KVJResultsForSingle align(Target target) {
if (!vResult.hasHits())
return new KVJResultsForSingle(target,
vResult, // V result is empty
// If -OallowPartialAlignments=true try align J gene
// If -OallowPartialAlignments=true try to align J gene
parameters.getAllowPartialAlignments() ?
jAligner.align(sequence) : null);

Expand Down
Loading