Skip to content

Commit

Permalink
Merge pull request #930 from milaboratory/db-refactoring
Browse files Browse the repository at this point in the history
Refactoring & fixes
  • Loading branch information
dbolotin authored Dec 19, 2022
2 parents 4a2b862 + 227f41d commit d5ddf46
Show file tree
Hide file tree
Showing 18 changed files with 143 additions and 188 deletions.
4 changes: 2 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,10 @@ repositories {
}
}

val milibVersion = "2.2.0-24-master"
val milibVersion = "2.2.0-25-master"
val repseqioVersion = "1.6.0-4-master"
val miplotsVersion = "1.2.0"
val mitoolVersion = "1.5.0-20-refactoring"
val mitoolVersion = "1.5.0-26-main"
val jacksonBomVersion = "2.14.1"
val redberryPipeVersion = "1.3.0-9-master"

Expand Down
5 changes: 5 additions & 0 deletions changelogs/v4.1.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ experimental setup and study goals.
- add tags info in `exportAlignmentsPretty` and `exportClonesPretty`
- add `--chains` filter for `exportShmTrees`, `exportShmTreesWithNodes`, `exportShmTreesNewick`
and `exportPlots shmTrees` commands
- fixed old bug #353, now all aligners favor leftmost J gene in situations where multiple genes can ve found in the
sequence (i.e. mis-spliced mRNA)
- new filter and parameter added in `assemblePartial`; parameter name is `minimalNOverlapShare`, it controls minimal
relative part of N region that must be covered by the overlap to conclude that two reads are from the same V(D)J
rearrangement
- fixed NPE in `assemblePartial` executed for the data without C-gene alignment settings
- by default exports show messages like 'region_not_covered' for data that can't be extracted (requesting `-nFeature`
for not covered region or not existed tag). Option `--not-covered-as-empty` will save previous behaviour
Expand Down
1 change: 0 additions & 1 deletion itests/case003.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@ mixcr analyze generic-tcr-amplicon \
test_R1.fastq test_R2.fastq case3

[[ -f case3.clna ]] || exit 1

6 changes: 3 additions & 3 deletions itests/case008.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ mixcr analyze generic-tcr-amplicon \
--split-clones-by V --split-clones-by J --split-clones-by C \
CD4M1_test_R1.fastq.gz CD4M1_test_R2.fastq.gz case8

assert "cat case8.align.report.json | head -n 1 | jq -r .chainUsage.chains.TRA.total" "237725"
assert "cat case8.assemble.report.json | head -n 1 | jq -r .readsInClones" "199563"
assert "cat case8.align.report.json | head -n 1 | jq -r .chainUsage.chains.TRA.total" "237722"
assert "cat case8.assemble.report.json | head -n 1 | jq -r .readsInClones" "199557"
assert "cat case8.assemble.report.json | head -n 1 | jq -r .clones" "25614"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .longestContigLength" "227"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .clonesWithAmbiguousLetters" "1070"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .clonesWithAmbiguousLetters" "1069"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .assemblePrematureTerminationEvents" "3"
assert "cat case8.assembleContigs.report.json | head -n 1 | jq -r .finalCloneCount" "25614"
6 changes: 3 additions & 3 deletions itests/case009.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ mixcr analyze generic-tcr-amplicon \
-M assemble.sortBySequence=true \
CD4M1_test_R1.fastq.gz CD4M1_test_R2.fastq.gz case9

assert "cat case9.align.report.json | head -n 1 | jq -r .chainUsage.chains.TRA.total" "237725"
assert "cat case9.assemble.report.json | head -n 1 | jq -r .readsInClones" "199563"
assert "cat case9.align.report.json | head -n 1 | jq -r .chainUsage.chains.TRA.total" "237722"
assert "cat case9.assemble.report.json | head -n 1 | jq -r .readsInClones" "199557"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .longestContigLength" "227"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .clonesWithAmbiguousLetters" "1070"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .clonesWithAmbiguousLetters" "1069"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .assemblePrematureTerminationEvents" "3"
assert "cat case9.assembleContigs.report.json | head -n 1 | jq -r .finalCloneCount" "25614"
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,12 @@ public PreCloneAssemblerResult getForNextGroup() {

assert alignmentInfos.size() == assemblerInput.size();

report.inputAssemblingFeatureSequences.addAndGet(alignmentInfos.size());
report.inputAlignments.addAndGet(localIdx + 1);

if (assemblerInput.isEmpty()) {
CUtils.drainWithoutClose(alignmentsReader2.take(), DummyInputPort.INSTANCE);
report.groupsWithNoAssemblingFeature.incrementAndGet();
return new PreCloneAssemblerResult(Collections.emptyList(), null);
}

Expand Down Expand Up @@ -237,6 +239,8 @@ public PreCloneAssemblerResult getForNextGroup() {
int numberOfClones = consensuses.size();
report.clonotypes.addAndGet(numberOfClones);
report.clonotypesPerGroup.get(numberOfClones).incrementAndGet();
if (numberOfClones == 0)
report.assemblingFeatureSequencesInZeroPreClones.addAndGet(alignmentInfos.size());

// Accumulates V, J and C gene information for each consensus
// noinspection unchecked
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,16 @@
public class PreCloneAssemblerReport implements MiXCRReport {
@JsonProperty("inputGroups")
public final long inputGroups;
@JsonProperty("groupsWithNoAssemblingFeature")
public final long groupsWithNoAssemblingFeature;
@JsonProperty("inputAlignments")
public final long inputAlignments;
@JsonProperty("inputAssemblingFeatureSequences")
public final long inputAssemblingFeatureSequences;
@JsonProperty("clonotypes")
public final long clonotypes;
@JsonProperty("assemblingFeatureSequencesInZeroPreClones")
public final long assemblingFeatureSequencesInZeroPreClones;
@JsonProperty("clonotypesPerGroup")
public final Map<Integer, Long> clonotypesPerGroup;
@JsonProperty("coreAlignments")
Expand Down Expand Up @@ -60,8 +66,11 @@ public class PreCloneAssemblerReport implements MiXCRReport {
@JsonCreator
public PreCloneAssemblerReport(
@JsonProperty("inputGroups") long inputGroups,
@JsonProperty("groupsWithNoAssemblingFeature") long groupsWithNoAssemblingFeature,
@JsonProperty("inputAlignments") long inputAlignments,
@JsonProperty("inputAssemblingFeatureSequences") long inputAssemblingFeatureSequences,
@JsonProperty("clonotypes") long clonotypes,
@JsonProperty("assemblingFeatureSequencesInZeroPreClones") long assemblingFeatureSequencesInZeroPreClones,
@JsonProperty("clonotypesPerGroup") Map<Integer, Long> clonotypesPerGroup,
@JsonProperty("coreAlignments") long coreAlignments,
@JsonProperty("discardedCoreAlignments") long discardedCoreAlignments,
Expand All @@ -78,8 +87,11 @@ public PreCloneAssemblerReport(
@JsonProperty("coreAlignmentsDroppedByTagSuffix") long coreAlignmentsDroppedByTagSuffix
) {
this.inputGroups = inputGroups;
this.groupsWithNoAssemblingFeature = groupsWithNoAssemblingFeature;
this.inputAlignments = inputAlignments;
this.inputAssemblingFeatureSequences = inputAssemblingFeatureSequences;
this.clonotypes = clonotypes;
this.assemblingFeatureSequencesInZeroPreClones = assemblingFeatureSequencesInZeroPreClones;
this.clonotypesPerGroup = clonotypesPerGroup;
this.coreAlignments = coreAlignments;
this.discardedCoreAlignments = discardedCoreAlignments;
Expand All @@ -99,11 +111,17 @@ public PreCloneAssemblerReport(
@Override
public void writeReport(ReportHelper helper) {
helper.writeField("Number of input groups", inputGroups);
helper.writeField("Number of groups with no assembling feature", groupsWithNoAssemblingFeature);
helper.writeField("Number of input alignments", inputAlignments);
helper.writeField("Number of output pre-clonotypes", clonotypes);
helper.writePercentAndAbsoluteField("Number alignments with assembling feature",
inputAssemblingFeatureSequences,
inputAlignments);
helper.print("Number of clonotypes per group:");
helper.print(ReportKt.format(clonotypesPerGroup, " ",
new StringBuilder(), new FormatSettings(0.0, Integer.MAX_VALUE, 0.05)).toString());
helper.writeField("Number of assembling feature sequences in groups with zero pre-clonotypes",
assemblingFeatureSequencesInZeroPreClones);
helper.writePercentAndAbsoluteField("Number of core alignments", coreAlignments, inputAlignments);
helper.writePercentAndAbsoluteField("Discarded core alignments", discardedCoreAlignments, coreAlignments);
helper.writePercentAndAbsoluteField("Empirically assigned alignments", empiricallyAssignedAlignments, inputAlignments);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@

public final class PreCloneAssemblerReportBuilder implements ReportBuilder {
final AtomicLong inputGroups = new AtomicLong();
final AtomicLong groupsWithNoAssemblingFeature = new AtomicLong();
final AtomicLong inputAlignments = new AtomicLong();
final AtomicLong inputAssemblingFeatureSequences = new AtomicLong();
final AtomicLong clonotypes = new AtomicLong();
final AtomicLong assemblingFeatureSequencesInZeroPreClones = new AtomicLong();
final ConcurrentAtomicLongMap<Integer> clonotypesPerGroup = new ConcurrentAtomicLongMap<>();
final AtomicLong coreAlignments = new AtomicLong();
final AtomicLong discardedCoreAlignments = new AtomicLong();
Expand Down Expand Up @@ -54,8 +57,11 @@ Map<GeneType, Long> getGeneConflictsMap() {
public PreCloneAssemblerReport buildReport() {
return new PreCloneAssemblerReport(
inputGroups.get(),
groupsWithNoAssemblingFeature.get(),
inputAlignments.get(),
inputAssemblingFeatureSequences.get(),
clonotypes.get(),
assemblingFeatureSequencesInZeroPreClones.get(),
clonotypesPerGroup.getImmutable(),
coreAlignments.get(),
discardedCoreAlignments.get(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ public SecondaryReader readAlignments() {
return new SecondaryReader(false);
}

public SecondaryReader createRawSecondaryReader() {
private SecondaryReader createRawSecondaryReader() {
ensureInitialized();
return new SecondaryReader(true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class PartialAlignmentsAssembler extends AbstractCommandReportBuilder<Par
final int kOffset;
final int minimalAssembleOverlap;
final int minimalNOverlap;
final float minimalNOverlapShare;
final boolean writePartial, overlappedOnly;
final VDJCAlignerParameters alignerParameters;
final TargetMerger targetMerger;
Expand Down Expand Up @@ -81,6 +82,7 @@ public PartialAlignmentsAssembler(PartialAlignmentsAssemblerParameters params,
this.kOffset = params.getKOffset();
this.minimalAssembleOverlap = params.getMinimalAssembleOverlap();
this.minimalNOverlap = params.getMinimalNOverlap();
this.minimalNOverlapShare = params.getMinimalNOverlapShare();
this.alignerParameters = alignerParameters;
this.targetMerger = new TargetMerger(params.getMergerParameters(), alignerParameters, params.getMinimalAlignmentMergeIdentity());
this.usedGenes = usedGenes;
Expand Down Expand Up @@ -217,8 +219,10 @@ public void searchOverlaps(OutputPort<VDJCAlignments> input) {

int actualNRegionLength = nRegion.totalLength();
int minimalN = Math.min(minimalNOverlap, actualNRegionLength);
int actualNRegionLengthInOverlap = nRegionInOverlap.totalLength();

if (nRegionInOverlap.totalLength() < minimalN) {
if (actualNRegionLengthInOverlap < minimalN ||
1.0f * actualNRegionLengthInOverlap / actualNRegionLength < minimalNOverlapShare) {
droppedSmallOverlapNRegion.incrementAndGet();
cancelCurrentResult.run();
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
getterVisibility = JsonAutoDetect.Visibility.NONE)
public class PartialAlignmentsAssemblerParameters {
private int kValue, kOffset, minimalAssembleOverlap, minimalNOverlap;
private float minimalAlignmentMergeIdentity;
private float minimalNOverlapShare, minimalAlignmentMergeIdentity;
private MergerParameters mergerParameters;
private long maxLeftParts;
private int maxLeftMatches;
Expand All @@ -36,6 +36,7 @@ public PartialAlignmentsAssemblerParameters(
@JsonProperty("kOffset") int kOffset,
@JsonProperty("minimalAssembleOverlap") int minimalAssembleOverlap,
@JsonProperty("minimalNOverlap") int minimalNOverlap,
@JsonProperty("minimalNOverlapShare") float minimalNOverlapShare,
@JsonProperty("minimalAlignmentMergeIdentity") float minimalAlignmentMergeIdentity,
@JsonProperty("mergerParameters") MergerParameters mergerParameters,
@JsonProperty("maxLeftParts") long maxLeftParts,
Expand All @@ -44,6 +45,7 @@ public PartialAlignmentsAssemblerParameters(
this.kOffset = kOffset;
this.minimalAssembleOverlap = minimalAssembleOverlap;
this.minimalNOverlap = minimalNOverlap;
this.minimalNOverlapShare = minimalNOverlapShare;
this.minimalAlignmentMergeIdentity = minimalAlignmentMergeIdentity;
this.mergerParameters = mergerParameters;
this.maxLeftParts = maxLeftParts;
Expand Down Expand Up @@ -90,6 +92,14 @@ public void setMinimalNOverlap(int minimalNOverlap) {
this.minimalNOverlap = minimalNOverlap;
}

public float getMinimalNOverlapShare() {
return minimalNOverlapShare;
}

public void setMinimalNOverlapShare(float minimalNOverlapShare) {
this.minimalNOverlapShare = minimalNOverlapShare;
}

public float getMinimalAlignmentMergeIdentity() {
return minimalAlignmentMergeIdentity;
}
Expand Down Expand Up @@ -123,6 +133,7 @@ public boolean equals(Object o) {
kOffset == that.kOffset &&
minimalAssembleOverlap == that.minimalAssembleOverlap &&
minimalNOverlap == that.minimalNOverlap &&
Float.compare(that.minimalNOverlapShare, minimalNOverlapShare) == 0 &&
Float.compare(that.minimalAlignmentMergeIdentity, minimalAlignmentMergeIdentity) == 0 &&
maxLeftParts == that.maxLeftParts &&
maxLeftMatches == that.maxLeftMatches &&
Expand All @@ -131,7 +142,7 @@ public boolean equals(Object o) {

@Override
public int hashCode() {
return Objects.hash(kValue, kOffset, minimalAssembleOverlap, minimalNOverlap, minimalAlignmentMergeIdentity, mergerParameters, maxLeftParts, maxLeftMatches);
return Objects.hash(kValue, kOffset, minimalAssembleOverlap, minimalNOverlap, minimalNOverlapShare, minimalAlignmentMergeIdentity, mergerParameters, maxLeftParts, maxLeftMatches);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import com.milaboratory.core.alignment.batch.AlignmentHit;
import com.milaboratory.core.alignment.batch.BatchAlignerWithBaseParameters;
import com.milaboratory.core.alignment.batch.BatchAlignerWithBaseWithFilter;
import com.milaboratory.core.alignment.batch.LeftmostBatchAlignerAdapter;
import com.milaboratory.core.sequence.NucleotideSequence;
import com.milaboratory.mixcr.basictypes.HasGene;
import com.milaboratory.util.BitArray;
Expand Down Expand Up @@ -168,7 +169,10 @@ protected void init() {
singleDAligner = new SingleDAligner(dAlignerParameters,
genesToAlign.get(GeneType.Diversity));
vAligner = createKAligner(GeneType.Variable);
jAligner = createKAligner(GeneType.Joining);
jAligner = new LeftmostBatchAlignerAdapter<>(
createKAligner(GeneType.Joining),
0.6f
);
cAligner = createKAligner(GeneType.Constant);

Chains chains = new Chains();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ private KVJResultsForSingle align(Target target) {
if (!vResult.hasHits())
return new KVJResultsForSingle(target,
vResult, // V result is empty
// If -OallowPartialAlignments=true try align J gene
// If -OallowPartialAlignments=true try to align J gene
parameters.getAllowPartialAlignments() ?
jAligner.align(sequence) : null);

Expand Down
Loading

0 comments on commit d5ddf46

Please sign in to comment.