From 617d5117ef04e41ddfeb44c23c1b5f4fd1df2b1a Mon Sep 17 00:00:00 2001 From: stateMachinist <190835587+stateMachinist@users.noreply.github.com> Date: Mon, 11 Aug 2025 20:20:10 +0200 Subject: [PATCH 01/15] added sparse learner --- algorithms/active/pom.xml | 1 + algorithms/active/sparse/pom.xml | 49 +++ .../sparse/AbstractSparseLearner.java | 319 ++++++++++++++++++ .../de/learnlib/algorithm/sparse/CoreRow.java | 40 +++ .../learnlib/algorithm/sparse/FringeRow.java | 37 ++ .../de/learnlib/algorithm/sparse/Leaf.java | 84 +++++ .../de/learnlib/algorithm/sparse/Node.java | 34 ++ .../de/learnlib/algorithm/sparse/Row.java | 26 ++ .../learnlib/algorithm/sparse/Separator.java | 34 ++ .../algorithm/sparse/SparseLearner.java | 38 +++ .../sparse/src/main/java/module-info.java | 39 +++ .../src/test/java/sparse/it/SparseIT.java | 33 ++ 12 files changed, 734 insertions(+) create mode 100644 algorithms/active/sparse/pom.xml create mode 100644 algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/AbstractSparseLearner.java create mode 100644 algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java create mode 100644 algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java create mode 100644 algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java create mode 100644 algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java create mode 100644 algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Row.java create mode 100644 algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java create mode 100644 algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java create mode 100644 algorithms/active/sparse/src/main/java/module-info.java create mode 100644 algorithms/active/sparse/src/test/java/sparse/it/SparseIT.java diff --git a/algorithms/active/pom.xml b/algorithms/active/pom.xml index 26f706662..7578c4742 100644 --- a/algorithms/active/pom.xml +++ b/algorithms/active/pom.xml @@ -43,6 +43,7 @@ limitations under the License. observation-pack observation-pack-vpa procedural + sparse ttt ttt-vpa diff --git a/algorithms/active/sparse/pom.xml b/algorithms/active/sparse/pom.xml new file mode 100644 index 000000000..6e678f3db --- /dev/null +++ b/algorithms/active/sparse/pom.xml @@ -0,0 +1,49 @@ + + + + 4.0.0 + + + de.learnlib + learnlib-algorithms-active-parent + 0.19.0-SNAPSHOT + ../pom.xml + + + learnlib-sparse + + LearnLib :: Algorithms :: Sparse + + This artifact provides the implementation of the Sparse OT learning algorithm as described in the paper "Learning Mealy Machines with Sparse Observation Tables". + + + + + de.learnlib + learnlib-util + + + de.learnlib + learnlib-counterexamples + + + de.learnlib.testsupport + learnlib-learner-it-support + + + diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/AbstractSparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/AbstractSparseLearner.java new file mode 100644 index 000000000..74cf4539b --- /dev/null +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/AbstractSparseLearner.java @@ -0,0 +1,319 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.learnlib.algorithm.sparse; + +import de.learnlib.algorithm.LearningAlgorithm.MealyLearner; +import de.learnlib.counterexample.LocalSuffixFinders; +import de.learnlib.oracle.MembershipOracle.MealyMembershipOracle; +import de.learnlib.query.DefaultQuery; +import de.learnlib.util.mealy.MealyUtil; +import net.automatalib.alphabet.Alphabet; +import net.automatalib.automaton.transducer.MealyMachine; +import net.automatalib.automaton.transducer.MutableMealyMachine; +import net.automatalib.common.util.Pair; +import net.automatalib.word.Word; + +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; + +abstract class AbstractSparseLearner implements MealyLearner { + + private final Alphabet alphabet; + private final MealyMembershipOracle oracle; + private final Deque> sufs; // suffixes + private final List> cRows; // core rows + private final Deque> fRows; // fringe rows + private final Map, FringeRow> prefToFringe; // fringe prefix to row + private final List, Word>> cells; // list of unique cells + private final Map, Word>, Integer> cellToIdx; // maps each unique cell to its list index + private final MutableMealyMachine hyp; // hypothesis + private final Map> stateToPrefix; // maps each state to its core row prefix + private final Function, Word> accSeq; // access sequence + + // for fast suffix ranking, we track for each suffix + // how the core rows are partitioned by it + private final Map, List> sufToVecs; + private final Map, Map, Integer>> sufToOutToIdx; + + AbstractSparseLearner(Alphabet alphabet, + MealyMembershipOracle oracle, + List> initialSuffixes, + MutableMealyMachine initialHypothesis) { + this.alphabet = alphabet; + this.oracle = oracle; + sufs = new ArrayDeque<>(initialSuffixes); + cRows = new ArrayList<>(); + fRows = new ArrayDeque<>(); + prefToFringe = new HashMap<>(); + cells = new ArrayList<>(); + cellToIdx = new HashMap<>(); + hyp = initialHypothesis; + stateToPrefix = new HashMap<>(); + accSeq = p -> stateToPrefix.get(hyp.getState(p)); + sufToVecs = new HashMap<>(); + sufToOutToIdx = new HashMap<>(); + } + + @Override + public MealyMachine getHypothesisModel() { + return hyp; + } + + @Override + public void startLearning() { + final S init = hyp.addInitialState(); + final CoreRow c = new CoreRow<>(Word.epsilon(), init, 0); + cRows.add(c); + sufs.forEach(s -> addSuffixToCoreRow(c, s)); + stateToPrefix.put(init, c.prefix); + extendFringe(c, init, new Leaf<>(c, 1, sufs.size(), Collections.emptyList())); + fRows.forEach(f -> query(f, Word.epsilon())); // query transition outputs + // initially, transition outputs must be queried manually, + // for later transitions, they derive from suffix queries + updateHypothesis(); + } + + @Override + public boolean refineHypothesis(DefaultQuery> q) { + final DefaultQuery> qs = MealyUtil.shortenCounterExample(hyp, q); + if (qs == null) { + return false; + } + + final int oldSize = hyp.size(); + identifyNewState(qs); + assert hyp.size() > oldSize; + updateHypothesis(); + assert hyp.size() == cRows.size(); + refineHypothesis(q); // recursively exhaust counterexample + return true; + } + + private void updateHypothesis() { + for (FringeRow f : fRows) { + classifyFringePrefix(f); + if (f.leaf == null) { + updateHypothesis(); + return; + } else { + assert f.transOut != null; + hyp.setTransition(f.srcState, f.transIn, f.leaf.cRow.state, f.transOut); + } + } + } + + private void classifyFringePrefix(FringeRow f) { + f.leaf.update(cRows, sufs.size()); + if (f.leaf.isUnsplit()) { + return; + } + + final Separator sep = f.leaf.sep; + if (sep != null) { + followNode(f, sep); + } else { + f.leaf.sep = new Separator<>(pickSuffix(f.leaf.remRows), f.leaf.remRows, f.leaf.cellsIds); + followNode(f, f.leaf.sep); + } + } + + private Word pickSuffix(BitSet remRows) { + assert remRows.length() <= cRows.size(); + Word bestSuf = sufs.getFirst(); + int bestRank = Integer.MAX_VALUE; + final BitSet vec = new BitSet(); + for (Word s : sufs) { + int maxOccur = 0; + int sumOccur = 0; // checksum + for (BitSet rows : sufToVecs.get(s)) { + vec.or(remRows); + vec.and(rows); + final int occur = vec.cardinality(); + maxOccur = Math.max(maxOccur, occur); + sumOccur += occur; + } + + assert sumOccur == remRows.cardinality(); + if (maxOccur < bestRank) { + // among equally ranked suffixes, pick youngest + // (mind that suffixes are stored/iterated LIFO) + bestSuf = s; + bestRank = maxOccur; + if (bestRank == 1) { // optimization: no better suffix is possible + return bestSuf; + } + } + } + + assert bestRank < remRows.cardinality(); + return bestSuf; + } + + private void followNode(FringeRow f, Node n) { + if (n instanceof Leaf) { + final Leaf l = (Leaf) n; + assert l.isUnsplit(); + f.leaf = l; + return; + } + + final Separator sep = (Separator) n; + final Word out = query(f, sep.suffix); + final int cellIdx = getUniqueCellIdx(sep.suffix, out); + final Node next = sep.branchMap.get(out); + if (next != null) { + followNode(f, next); + return; + } + + final BitSet remRows = new BitSet(); + sep.remRows.stream().filter(i -> cRows.get(i).cellIds.contains(cellIdx)).forEach(remRows::set); + final List cellIds = new ArrayList<>(sep.cellsIds); // important: copy elements! + cellIds.add(cellIdx); + if (remRows.isEmpty()) { + // no compatible core prefix + f.leaf = null; + moveToCore(f, cellIds); + } else if (remRows.cardinality() == 1) { + final Leaf l = new Leaf<>(cRows.get(remRows.nextSetBit(0)), cRows.size(), sufs.size(), cellIds); + sep.branchMap.put(out, l); + followNode(f, l); + } else { + final Separator s = new Separator<>(pickSuffix(remRows), remRows, cellIds); + sep.branchMap.put(out, s); + followNode(f, s); + } + } + + private Word query(Row r, Word suf) { + final Word out = oracle.answerQuery(r.prefix.concat(suf)); + if (r instanceof FringeRow) { + final FringeRow f = (FringeRow) r; + f.transOut = out.prefix(f.prefix.length()).lastSymbol(); + } + + return out.suffix(suf.length()); + } + + /** adds suffix-output pair to index if not yet contained + * and returns a unique identifier representing the pair */ + private int getUniqueCellIdx(Word suf, Word out) { + assert suf.length() == out.length(); + final Pair, Word> cell = Pair.of(suf, out); + final int idx = cellToIdx.computeIfAbsent(cell, c -> cellToIdx.size()); + if (idx == cells.size()) { + cells.add(cell); + } + + assert cellToIdx.size() == cells.size(); + return idx; + } + + /** returns index of new core row */ + private int moveToCore(FringeRow f, List cellIds) { + assert fRows.contains(f); + fRows.remove(f); + final S state = hyp.addState(); + final CoreRow c = new CoreRow<>(f.prefix, state, cRows.size()); + stateToPrefix.put(state, c.prefix); + assert f.transOut != null; + hyp.setTransition(f.srcState, f.transIn, state, f.transOut); + for (Integer cellIdx : completeRowObservations(f, cellIds)) { + final Pair, Word> cell = this.cells.get(cellIdx); + addCellToCoreRow(c, cell.getFirst(), cell.getSecond(), cellIdx); + } + + cRows.add(c); + extendFringe(c, state, new Leaf<>()); + assert c.cellIds.size() == sufs.size(); + assert c == cRows.get(c.idx); + return c.idx; + } + + /** takes fringe row and its observations, queries the missing entries + * and returns a list containing the observations for all suffixes */ + private List completeRowObservations(FringeRow f, List cellIds) { + final List> sufsPresent = cellIds.stream().map(c -> this.cells.get(c).getFirst()).collect(Collectors.toList()); + final List> sufsMissing = sufs.stream().filter(s -> !sufsPresent.contains(s)).collect(Collectors.toList()); + final List cellIdsFull = new ArrayList<>(cellIds); // important: copy elements! + sufsMissing.forEach(s -> cellIdsFull.add(getUniqueCellIdx(s, query(f, s)))); + return cellIdsFull; + } + + private void extendFringe(CoreRow c, S state, Leaf leaf) { + for (I i : alphabet) { + // add missing fringe rows for new transitions + final Word prefix = c.prefix.append(i); + final FringeRow fRow = new FringeRow<>(prefix, state, leaf); + prefToFringe.put(prefix, fRow); + fRows.push(fRow); // prioritize new rows during classification + } + } + + private void identifyNewState(DefaultQuery> q) { + final Word cex = q.getInput(); + final int idxSuf = LocalSuffixFinders.findRivestSchapire(q, accSeq::apply, hyp, oracle); + final int idxSym = idxSuf - 1; + final Word u = accSeq.apply(cex.prefix(idxSym)); + final Word ui = u.append(cex.getSymbol(idxSym)); + final FringeRow f = prefToFringe.get(ui); + assert f.leaf.isUnsplit(); + final int cRowIdx = moveToCore(f, f.leaf.cellsIds); + if (f.leaf.cRow.cellIds.containsAll(cRows.get(cRowIdx).cellIds)) { + // only add new suffix if the row is not yet distinguished + addSuffixToTable(cex.subWord(idxSuf)); + } + } + + private void addSuffixToTable(Word suf) { + assert !sufs.contains(suf); + sufs.push(suf); + // this might be an extension of an existing suffix + // -> storing/iterating suffixes in LIFO order + // exploits caching when filling core rows + + // similarly, since core rows are prefix-closed, + // cache hit rate for adding suffixes is maximized + // by iterating core rows in LIFO order + for (int i = cRows.size() - 1; i >= 0; i--) { + addSuffixToCoreRow(cRows.get(i), suf); + } + } + + private void addSuffixToCoreRow(CoreRow c, Word suf) { + final Word out = query(c, suf); + final int cellIdx = getUniqueCellIdx(suf, out); + addCellToCoreRow(c, suf, out, cellIdx); + } + + private void addCellToCoreRow(CoreRow c, Word suf, Word out, Integer cellIdx) { + c.addSuffix(suf, out, cellIdx); + updatePartitionMap(c, suf, out); + } + + private void updatePartitionMap(CoreRow c, Word suf, Word out) { + final Map, Integer> outToIdx = sufToOutToIdx.computeIfAbsent(suf, s -> new HashMap<>()); + final int idx = outToIdx.computeIfAbsent(out, o -> outToIdx.size()); + final List vecs = sufToVecs.computeIfAbsent(suf, s -> new ArrayList<>()); + assert idx <= vecs.size(); + if (idx == vecs.size()) { + vecs.add(new BitSet()); + } + + vecs.get(idx).set(c.idx); + } +} \ No newline at end of file diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java new file mode 100644 index 000000000..a526bd347 --- /dev/null +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java @@ -0,0 +1,40 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.learnlib.algorithm.sparse; + +import net.automatalib.word.Word; + +import java.util.*; + +class CoreRow extends Row { + final S state; // hypothesis state associated with this row + final int idx; // index in core row list + final Map, Word> sufToOut; // maps suffixes to the outputs contained in this row + final Set cellIds; // also store identifiers of suffix-output pairs for fast compatability checking + + CoreRow(Word prefix, S state, int idx) { + super(prefix); + this.state = state; + this.idx = idx; + sufToOut = new HashMap<>(); + cellIds = new HashSet<>(); // use HashSet to enable fast containment checks + } + + void addSuffix(Word suf, Word out, int cell) { + sufToOut.put(suf, out); + cellIds.add(cell); + } +} diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java new file mode 100644 index 000000000..ad033740f --- /dev/null +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java @@ -0,0 +1,37 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.learnlib.algorithm.sparse; + +import net.automatalib.word.Word; + +class FringeRow extends Row { + + // each fringe row represents a hypothesis transition + final S srcState; // source state + final I transIn; // input symbol + O transOut; // output symbol (determined dynamically) + Leaf leaf; + // for compression, fringe rows do not store observations directly. + // instead, they point to some leaf in a tree encoding their classification history. + // this trick avoids redundantly storing identical observations. + + FringeRow(Word prefix, S srcState, Leaf leaf) { + super(prefix); + this.srcState = srcState; + this.transIn = prefix.lastSymbol(); + this.leaf = leaf; + } +} diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java new file mode 100644 index 000000000..322a2709d --- /dev/null +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java @@ -0,0 +1,84 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.learnlib.algorithm.sparse; + +import java.util.Collections; +import java.util.List; + +class Leaf extends Node { + + final CoreRow cRow; + private boolean split; + private int lastNumCRows; + private int lastNumSufs; + Separator sep; + // split leafs always remember how many core rows and suffixes + // the table contained at their last visit. + // this information is used as a logical timestamp + // to check whether the separator is still guaranteed to be optimal + // or if it needs to be recomputed. + + /** creates split leaf without observations */ + Leaf() { + super(Collections.emptyList()); + cRow = null; + split = true; + lastNumCRows = 0; + lastNumSufs = 0; + // timestamps will be updated automatically + } + + /** creates unsplit leaf associated with the given core row and observations */ + Leaf(CoreRow cRow, int numCRows, int numSufs, List cellIds) { + super(cellIds); + this.cRow = cRow; + remRows.set(cRow.idx); + split = false; + lastNumCRows = numCRows; + lastNumSufs = numSufs; + } + + boolean isUnsplit() { + return !split; + } + + void update(List> cRows, int numSufs) { + assert lastNumCRows <= cRows.size(); + if (lastNumCRows == cRows.size()) { + assert lastNumSufs == numSufs; + return; + } else if (numSufs > lastNumSufs) { + lastNumSufs = numSufs; + sep = null; + } + + // since suffixes and core rows grow monotonically, + // the separator only needs to be recomputed whenever + // new compatible core prefixes emerge + // or when the suffix set grows. + + for (int i = lastNumCRows; i < cRows.size(); i++) { + final CoreRow c = cRows.get(i); + if (c.cellIds.containsAll(cellsIds)) { + remRows.set(c.idx); + split = true; + sep = null; + } + } + + lastNumCRows = cRows.size(); + } +} diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java new file mode 100644 index 000000000..455505620 --- /dev/null +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java @@ -0,0 +1,34 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.learnlib.algorithm.sparse; + +import java.util.BitSet; +import java.util.List; + +abstract class Node { // type parameters required for safe casting + + final List cellsIds; // cell identifiers of the fringe rows at this node + final BitSet remRows; + + Node(List cellsIds) { + this(cellsIds, new BitSet()); + } + + Node(List cellsIds, BitSet remRows) { + this.cellsIds = cellsIds; + this.remRows = remRows; + } +} diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Row.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Row.java new file mode 100644 index 000000000..1ad9c082d --- /dev/null +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Row.java @@ -0,0 +1,26 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.learnlib.algorithm.sparse; + +import net.automatalib.word.Word; + +abstract class Row { // type parameters required for safe casting + final Word prefix; + + Row(Word prefix) { + this.prefix = prefix; + } +} diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java new file mode 100644 index 000000000..1a278d4dd --- /dev/null +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java @@ -0,0 +1,34 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.learnlib.algorithm.sparse; + +import net.automatalib.word.Word; + +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +class Separator extends Node { + final Word suffix; + final Map, Node> branchMap; + + Separator(Word suffix, BitSet remRows, List cells) { + super(cells, remRows); + this.suffix = suffix; + branchMap = new HashMap<>(); + } +} diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java new file mode 100644 index 000000000..585f4038e --- /dev/null +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java @@ -0,0 +1,38 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package de.learnlib.algorithm.sparse; + +import de.learnlib.oracle.MembershipOracle.MealyMembershipOracle; +import net.automatalib.alphabet.Alphabet; +import net.automatalib.automaton.transducer.impl.CompactMealy; +import net.automatalib.word.Word; + +import java.util.*; + +/** + * optimized implementation of the Ls learning algorithm, + * as described in the appendix of the paper + */ +public class SparseLearner extends AbstractSparseLearner { + + public SparseLearner(Alphabet alphabet, MealyMembershipOracle oracle) { + this(alphabet, oracle, Collections.emptyList()); + } + + public SparseLearner(Alphabet alphabet, MealyMembershipOracle oracle, List> initialSuffixes) { + super(alphabet, oracle, initialSuffixes, new CompactMealy<>(alphabet)); + } +} \ No newline at end of file diff --git a/algorithms/active/sparse/src/main/java/module-info.java b/algorithms/active/sparse/src/main/java/module-info.java new file mode 100644 index 000000000..d59599e07 --- /dev/null +++ b/algorithms/active/sparse/src/main/java/module-info.java @@ -0,0 +1,39 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This module provides the implementation of the Sparse OT learning algorithm as described in the paper "Learning Mealy Machines with Sparse Observation Tables". + *

+ * This module is provided by the following Maven dependency: + *

+ * <dependency>
+ *   <groupId>de.learnlib</groupId>
+ *   <artifactId>learnlib-sparse</artifactId>
+ *   <version>${version}</version>
+ * </dependency>
+ * 
+ */ +open module de.learnlib.algorithm.sparse { + + requires de.learnlib.common.counterexample; + requires de.learnlib.common.util; + requires de.learnlib.api; + requires net.automatalib.core; + requires net.automatalib.api; + requires net.automatalib.common.util; + + exports de.learnlib.algorithm.sparse; +} diff --git a/algorithms/active/sparse/src/test/java/sparse/it/SparseIT.java b/algorithms/active/sparse/src/test/java/sparse/it/SparseIT.java new file mode 100644 index 000000000..c708097a3 --- /dev/null +++ b/algorithms/active/sparse/src/test/java/sparse/it/SparseIT.java @@ -0,0 +1,33 @@ +/* Copyright (C) 2013-2025 TU Dortmund University + * This file is part of LearnLib . + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package sparse.it; + +import de.learnlib.algorithm.sparse.SparseLearner; +import de.learnlib.oracle.MembershipOracle.MealyMembershipOracle; +import de.learnlib.testsupport.it.learner.AbstractMealyLearnerIT; +import de.learnlib.testsupport.it.learner.LearnerVariantList; +import net.automatalib.alphabet.Alphabet; + +public class SparseIT extends AbstractMealyLearnerIT { + + @Override + protected void addLearnerVariants(Alphabet alphabet, + int targetSize, + MealyMembershipOracle mqOracle, + LearnerVariantList.MealyLearnerVariantList variants) { + variants.addLearnerVariant("sparse", new SparseLearner<>(alphabet, mqOracle)); + } +} From d43221e4dbcabcf9421b1258f1f46485b1ecc0b7 Mon Sep 17 00:00:00 2001 From: stateMachinist <190835587+stateMachinist@users.noreply.github.com> Date: Mon, 11 Aug 2025 20:30:43 +0200 Subject: [PATCH 02/15] fixed comment --- .../main/java/de/learnlib/algorithm/sparse/SparseLearner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java index 585f4038e..4f7ff2630 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java @@ -24,7 +24,7 @@ /** * optimized implementation of the Ls learning algorithm, - * as described in the appendix of the paper + * as described in section 6 of the paper "Learning Mealy Machines with Sparse Observation Tables" */ public class SparseLearner extends AbstractSparseLearner { From dbeac250a506c9112709b8b0dd9ccf2b068ac31e Mon Sep 17 00:00:00 2001 From: stateMachinist <190835587+stateMachinist@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:23:09 +0200 Subject: [PATCH 03/15] adjustments for code style --- algorithms/active/sparse/pom.xml | 16 +++++++ .../de/learnlib/algorithm/sparse/CoreRow.java | 7 +++- ...Learner.java => GenericSparseLearner.java} | 42 ++++++++++++------- .../de/learnlib/algorithm/sparse/Leaf.java | 4 +- .../de/learnlib/algorithm/sparse/Node.java | 6 +-- .../de/learnlib/algorithm/sparse/Row.java | 4 +- .../learnlib/algorithm/sparse/Separator.java | 4 +- .../algorithm/sparse/SparseLearner.java | 11 ++--- 8 files changed, 62 insertions(+), 32 deletions(-) rename algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/{AbstractSparseLearner.java => GenericSparseLearner.java} (92%) diff --git a/algorithms/active/sparse/pom.xml b/algorithms/active/sparse/pom.xml index 6e678f3db..24385d987 100644 --- a/algorithms/active/sparse/pom.xml +++ b/algorithms/active/sparse/pom.xml @@ -33,6 +33,22 @@ limitations under the License. + + net.automatalib + automata-api + + + net.automatalib + automata-commons-util + + + net.automatalib + automata-core + + + de.learnlib + learnlib-api + de.learnlib learnlib-util diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java index a526bd347..52816609e 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java @@ -15,9 +15,12 @@ */ package de.learnlib.algorithm.sparse; -import net.automatalib.word.Word; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; -import java.util.*; +import net.automatalib.word.Word; class CoreRow extends Row { final S state; // hypothesis state associated with this row diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/AbstractSparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java similarity index 92% rename from algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/AbstractSparseLearner.java rename to algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java index 74cf4539b..444020d8d 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/AbstractSparseLearner.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java @@ -15,6 +15,17 @@ */ package de.learnlib.algorithm.sparse; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + import de.learnlib.algorithm.LearningAlgorithm.MealyLearner; import de.learnlib.counterexample.LocalSuffixFinders; import de.learnlib.oracle.MembershipOracle.MealyMembershipOracle; @@ -26,11 +37,7 @@ import net.automatalib.common.util.Pair; import net.automatalib.word.Word; -import java.util.*; -import java.util.function.Function; -import java.util.stream.Collectors; - -abstract class AbstractSparseLearner implements MealyLearner { +class GenericSparseLearner implements MealyLearner { private final Alphabet alphabet; private final MealyMembershipOracle oracle; @@ -49,10 +56,11 @@ abstract class AbstractSparseLearner implements MealyLearner { private final Map, List> sufToVecs; private final Map, Map, Integer>> sufToOutToIdx; - AbstractSparseLearner(Alphabet alphabet, - MealyMembershipOracle oracle, - List> initialSuffixes, - MutableMealyMachine initialHypothesis) { + protected GenericSparseLearner(Alphabet alphabet, + MealyMembershipOracle oracle, + List> initialSuffixes, + MutableMealyMachine emptyMachine) { + assert emptyMachine.size() == 0; this.alphabet = alphabet; this.oracle = oracle; sufs = new ArrayDeque<>(initialSuffixes); @@ -61,7 +69,7 @@ abstract class AbstractSparseLearner implements MealyLearner { prefToFringe = new HashMap<>(); cells = new ArrayList<>(); cellToIdx = new HashMap<>(); - hyp = initialHypothesis; + hyp = emptyMachine; stateToPrefix = new HashMap<>(); accSeq = p -> stateToPrefix.get(hyp.getState(p)); sufToVecs = new HashMap<>(); @@ -209,8 +217,9 @@ private Word query(Row r, Word suf) { return out.suffix(suf.length()); } - /** adds suffix-output pair to index if not yet contained - * and returns a unique identifier representing the pair */ + /** + * adds suffix-output pair to index if not yet contained + * and returns a unique identifier representing the pair. */ private int getUniqueCellIdx(Word suf, Word out) { assert suf.length() == out.length(); final Pair, Word> cell = Pair.of(suf, out); @@ -223,7 +232,7 @@ private int getUniqueCellIdx(Word suf, Word out) { return idx; } - /** returns index of new core row */ + /** returns index of new core row. */ private int moveToCore(FringeRow f, List cellIds) { assert fRows.contains(f); fRows.remove(f); @@ -244,8 +253,9 @@ private int moveToCore(FringeRow f, List cellIds) { return c.idx; } - /** takes fringe row and its observations, queries the missing entries - * and returns a list containing the observations for all suffixes */ + /** + * takes fringe row and its observations, queries the missing entries + * and returns a list containing the observations for all suffixes. */ private List completeRowObservations(FringeRow f, List cellIds) { final List> sufsPresent = cellIds.stream().map(c -> this.cells.get(c).getFirst()).collect(Collectors.toList()); final List> sufsMissing = sufs.stream().filter(s -> !sufsPresent.contains(s)).collect(Collectors.toList()); @@ -316,4 +326,4 @@ private void updatePartitionMap(CoreRow c, Word suf, Word out) { vecs.get(idx).set(c.idx); } -} \ No newline at end of file +} diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java index 322a2709d..10a8e1735 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java @@ -31,7 +31,7 @@ class Leaf extends Node { // to check whether the separator is still guaranteed to be optimal // or if it needs to be recomputed. - /** creates split leaf without observations */ + /** creates split leaf without observations. */ Leaf() { super(Collections.emptyList()); cRow = null; @@ -41,7 +41,7 @@ class Leaf extends Node { // timestamps will be updated automatically } - /** creates unsplit leaf associated with the given core row and observations */ + /** creates unsplit leaf associated with the given core row and observations. */ Leaf(CoreRow cRow, int numCRows, int numSufs, List cellIds) { super(cellIds); this.cRow = cRow; diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java index 455505620..0ad49b557 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java @@ -18,16 +18,16 @@ import java.util.BitSet; import java.util.List; -abstract class Node { // type parameters required for safe casting +class Node { // type parameters required for safe casting final List cellsIds; // cell identifiers of the fringe rows at this node final BitSet remRows; - Node(List cellsIds) { + protected Node(List cellsIds) { this(cellsIds, new BitSet()); } - Node(List cellsIds, BitSet remRows) { + protected Node(List cellsIds, BitSet remRows) { this.cellsIds = cellsIds; this.remRows = remRows; } diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Row.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Row.java index 1ad9c082d..26ee62442 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Row.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Row.java @@ -17,10 +17,10 @@ import net.automatalib.word.Word; -abstract class Row { // type parameters required for safe casting +class Row { // type parameters required for safe casting final Word prefix; - Row(Word prefix) { + protected Row(Word prefix) { this.prefix = prefix; } } diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java index 1a278d4dd..61a5f2705 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java @@ -15,13 +15,13 @@ */ package de.learnlib.algorithm.sparse; -import net.automatalib.word.Word; - import java.util.BitSet; import java.util.HashMap; import java.util.List; import java.util.Map; +import net.automatalib.word.Word; + class Separator extends Node { final Word suffix; final Map, Node> branchMap; diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java index 4f7ff2630..6412190e9 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java @@ -15,18 +15,19 @@ */ package de.learnlib.algorithm.sparse; +import java.util.Collections; +import java.util.List; + import de.learnlib.oracle.MembershipOracle.MealyMembershipOracle; import net.automatalib.alphabet.Alphabet; import net.automatalib.automaton.transducer.impl.CompactMealy; import net.automatalib.word.Word; -import java.util.*; - /** * optimized implementation of the Ls learning algorithm, - * as described in section 6 of the paper "Learning Mealy Machines with Sparse Observation Tables" + * as described in section 6 of the paper "Learning Mealy Machines with Sparse Observation Tables". */ -public class SparseLearner extends AbstractSparseLearner { +public class SparseLearner extends GenericSparseLearner { public SparseLearner(Alphabet alphabet, MealyMembershipOracle oracle) { this(alphabet, oracle, Collections.emptyList()); @@ -35,4 +36,4 @@ public SparseLearner(Alphabet alphabet, MealyMembershipOracle oracle) { public SparseLearner(Alphabet alphabet, MealyMembershipOracle oracle, List> initialSuffixes) { super(alphabet, oracle, initialSuffixes, new CompactMealy<>(alphabet)); } -} \ No newline at end of file +} From 13e7214ba804c5d833052620ec275a950d95c65e Mon Sep 17 00:00:00 2001 From: stateMachinist <190835587+stateMachinist@users.noreply.github.com> Date: Wed, 3 Sep 2025 21:12:17 +0200 Subject: [PATCH 04/15] extended contributor list --- pom.xml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pom.xml b/pom.xml index 6245bdfce..4c88da799 100644 --- a/pom.xml +++ b/pom.xml @@ -153,6 +153,15 @@ limitations under the License. Developer + + Wolffhardt Schwabe + schwabe@tu-berlin.de + TU Berlin, Software and Embedded Systems Engineering + https://tu.berlin/sese/ + + Developer + + - net.automatalib - automata-api + de.learnlib + learnlib-api - net.automatalib - automata-commons-util + de.learnlib + learnlib-counterexamples - net.automatalib - automata-core + de.learnlib + learnlib-util + + - de.learnlib - learnlib-api + net.automatalib + automata-api - de.learnlib - learnlib-util + net.automatalib + automata-commons-util - de.learnlib - learnlib-counterexamples + net.automatalib + automata-core + + de.learnlib.testsupport learnlib-learner-it-support diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java index 52816609e..211ea8ba7 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java @@ -23,17 +23,30 @@ import net.automatalib.word.Word; class CoreRow extends Row { - final S state; // hypothesis state associated with this row - final int idx; // index in core row list - final Map, Word> sufToOut; // maps suffixes to the outputs contained in this row - final Set cellIds; // also store identifiers of suffix-output pairs for fast compatability checking + + /** + * Hypothesis state associated with this row. + */ + final S state; + /** + * Index in core row list. + */ + final int idx; + /** + * Maps suffixes to the outputs contained in this row. + */ + final Map, Word> sufToOut; + /** + * Also store identifiers of suffix-output pairs for fast compatibility checking. + */ + final Set cellIds; CoreRow(Word prefix, S state, int idx) { super(prefix); this.state = state; this.idx = idx; sufToOut = new HashMap<>(); - cellIds = new HashSet<>(); // use HashSet to enable fast containment checks + cellIds = new HashSet<>(); } void addSuffix(Word suf, Word out, int cell) { diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java index ad033740f..f823d6005 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java @@ -17,16 +17,35 @@ import net.automatalib.word.Word; +/** + * Each fringe row represents a hypothesis transition. + * + * @param + * state type + * @param + * input symbol type + * @param + * output symbol type + */ class FringeRow extends Row { - // each fringe row represents a hypothesis transition - final S srcState; // source state - final I transIn; // input symbol - O transOut; // output symbol (determined dynamically) + /** + * Source state. + */ + final S srcState; + /** + * Input symbol. + */ + final I transIn; + /** + * Output symbol (determined dynamically). + */ + O transOut; + /** + * For compression, fringe rows do not store observations directly. instead, they point to some leaf in a tree + * encoding their classification history. this trick avoids redundantly storing identical observations. + */ Leaf leaf; - // for compression, fringe rows do not store observations directly. - // instead, they point to some leaf in a tree encoding their classification history. - // this trick avoids redundantly storing identical observations. FringeRow(Word prefix, S srcState, Leaf leaf) { super(prefix); diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java index 444020d8d..a2eec9838 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java @@ -21,10 +21,11 @@ import java.util.Collections; import java.util.Deque; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; -import java.util.stream.Collectors; import de.learnlib.algorithm.LearningAlgorithm.MealyLearner; import de.learnlib.counterexample.LocalSuffixFinders; @@ -34,6 +35,7 @@ import net.automatalib.alphabet.Alphabet; import net.automatalib.automaton.transducer.MealyMachine; import net.automatalib.automaton.transducer.MutableMealyMachine; +import net.automatalib.common.util.HashUtil; import net.automatalib.common.util.Pair; import net.automatalib.word.Word; @@ -41,26 +43,56 @@ class GenericSparseLearner implements MealyLearner { private final Alphabet alphabet; private final MealyMembershipOracle oracle; - private final Deque> sufs; // suffixes - private final List> cRows; // core rows - private final Deque> fRows; // fringe rows - private final Map, FringeRow> prefToFringe; // fringe prefix to row - private final List, Word>> cells; // list of unique cells - private final Map, Word>, Integer> cellToIdx; // maps each unique cell to its list index - private final MutableMealyMachine hyp; // hypothesis - private final Map> stateToPrefix; // maps each state to its core row prefix - private final Function, Word> accSeq; // access sequence - - // for fast suffix ranking, we track for each suffix - // how the core rows are partitioned by it + /** + * Suffixes. + */ + private final Deque> sufs; + /** + * Core rows. + */ + private final List> cRows; + /** + * Fringe rows. + */ + private final Deque> fRows; + /** + * Fringe prefix to row. + */ + private final Map, FringeRow> prefToFringe; + /** + * List of unique cells. + */ + private final List, Word>> cells; + /** + * Maps each unique cell to its list index. + */ + private final Map, Word>, Integer> cellToIdx; + /** + * Hypothesis. + */ + private final MutableMealyMachine hyp; + /** + * Maps each state to its core row prefix. + */ + private final Map> stateToPrefix; + /** + * Access sequences. + */ + private final Function, Word> accSeq; + + /** + * For fast suffix ranking, we track for each suffix how the core rows are partitioned by it. + */ private final Map, List> sufToVecs; + /** + * See {@link #sufToVecs}. + */ private final Map, Map, Integer>> sufToOutToIdx; protected GenericSparseLearner(Alphabet alphabet, MealyMembershipOracle oracle, List> initialSuffixes, MutableMealyMachine emptyMachine) { - assert emptyMachine.size() == 0; this.alphabet = alphabet; this.oracle = oracle; sufs = new ArrayDeque<>(initialSuffixes); @@ -90,8 +122,7 @@ public void startLearning() { stateToPrefix.put(init, c.prefix); extendFringe(c, init, new Leaf<>(c, 1, sufs.size(), Collections.emptyList())); fRows.forEach(f -> query(f, Word.epsilon())); // query transition outputs - // initially, transition outputs must be queried manually, - // for later transitions, they derive from suffix queries + // initially, transition outputs must be queried manually for later transitions, they derive from suffix queries updateHypothesis(); } @@ -157,8 +188,7 @@ private Word pickSuffix(BitSet remRows) { assert sumOccur == remRows.cardinality(); if (maxOccur < bestRank) { - // among equally ranked suffixes, pick youngest - // (mind that suffixes are stored/iterated LIFO) + // among equally ranked suffixes, pick youngest (mind that suffixes are stored/iterated LIFO) bestSuf = s; bestRank = maxOccur; if (bestRank == 1) { // optimization: no better suffix is possible @@ -189,11 +219,15 @@ private void followNode(FringeRow f, Node n) { } final BitSet remRows = new BitSet(); - sep.remRows.stream().filter(i -> cRows.get(i).cellIds.contains(cellIdx)).forEach(remRows::set); - final List cellIds = new ArrayList<>(sep.cellsIds); // important: copy elements! + for (int i = sep.remRows.nextSetBit(0); i >= 0; i = sep.remRows.nextSetBit(i + 1)) { + if (cRows.get(i).cellIds.contains(cellIdx)) { + remRows.set(i); + } + } + final List cellIds = new ArrayList<>(sep.cellsIds.size() + 1); + cellIds.addAll(sep.cellsIds); cellIds.add(cellIdx); - if (remRows.isEmpty()) { - // no compatible core prefix + if (remRows.isEmpty()) { // no compatible core prefix f.leaf = null; moveToCore(f, cellIds); } else if (remRows.cardinality() == 1) { @@ -218,8 +252,8 @@ private Word query(Row r, Word suf) { } /** - * adds suffix-output pair to index if not yet contained - * and returns a unique identifier representing the pair. */ + * Adds suffix-output pair to index if not yet contained and returns a unique identifier representing the pair. + */ private int getUniqueCellIdx(Word suf, Word out) { assert suf.length() == out.length(); final Pair, Word> cell = Pair.of(suf, out); @@ -232,10 +266,12 @@ private int getUniqueCellIdx(Word suf, Word out) { return idx; } - /** returns index of new core row. */ + /** + * Returns index of new core row. + */ private int moveToCore(FringeRow f, List cellIds) { - assert fRows.contains(f); - fRows.remove(f); + boolean removed = fRows.remove(f); + assert removed; final S state = hyp.addState(); final CoreRow c = new CoreRow<>(f.prefix, state, cRows.size()); stateToPrefix.put(state, c.prefix); @@ -254,13 +290,25 @@ private int moveToCore(FringeRow f, List cellIds) { } /** - * takes fringe row and its observations, queries the missing entries - * and returns a list containing the observations for all suffixes. */ + * Takes fringe row and its observations, Queries the missing entries and returns a list containing the observations + * for all suffixes. + */ private List completeRowObservations(FringeRow f, List cellIds) { - final List> sufsPresent = cellIds.stream().map(c -> this.cells.get(c).getFirst()).collect(Collectors.toList()); - final List> sufsMissing = sufs.stream().filter(s -> !sufsPresent.contains(s)).collect(Collectors.toList()); - final List cellIdsFull = new ArrayList<>(cellIds); // important: copy elements! - sufsMissing.forEach(s -> cellIdsFull.add(getUniqueCellIdx(s, query(f, s)))); + final Set> sufsPresent = new HashSet<>(HashUtil.capacity(cellIds.size())); + for (Integer id : cellIds) { + sufsPresent.add(this.cells.get(id).getFirst()); + } + final List> sufsMissing = new ArrayList<>(sufs.size()); + for (Word s : sufs) { + if (!sufsPresent.contains(s)) { + sufsMissing.add(s); + } + } + final List cellIdsFull = new ArrayList<>(cellIds.size() + sufsMissing.size()); + cellIdsFull.addAll(cellIds); + for (Word s : sufsMissing) { + cellIdsFull.add(getUniqueCellIdx(s, query(f, s))); + } return cellIdsFull; } @@ -292,13 +340,11 @@ private void identifyNewState(DefaultQuery> q) { private void addSuffixToTable(Word suf) { assert !sufs.contains(suf); sufs.push(suf); - // this might be an extension of an existing suffix - // -> storing/iterating suffixes in LIFO order - // exploits caching when filling core rows - - // similarly, since core rows are prefix-closed, - // cache hit rate for adding suffixes is maximized - // by iterating core rows in LIFO order + /* + * This might be an extension of an existing suffix -> storing/iterating suffixes in LIFO order exploits caching + * when filling core rows. Similarly, since core rows are prefix-closed, cache hit rate for adding suffixes is + * maximized by iterating core rows in LIFO order + */ for (int i = cRows.size() - 1; i >= 0; i--) { addSuffixToCoreRow(cRows.get(i), suf); } diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java index 10a8e1735..bd7dd3334 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java @@ -24,14 +24,17 @@ class Leaf extends Node { private boolean split; private int lastNumCRows; private int lastNumSufs; + + /** + * Split leafs always remember how many core rows and suffixes the table contained at their last visit. Tis + * information is used as a logical timestamp to check whether the separator is still guaranteed to be optimal or if + * it needs to be recomputed. + */ Separator sep; - // split leafs always remember how many core rows and suffixes - // the table contained at their last visit. - // this information is used as a logical timestamp - // to check whether the separator is still guaranteed to be optimal - // or if it needs to be recomputed. - /** creates split leaf without observations. */ + /** + * Creates split leaf without observations. + */ Leaf() { super(Collections.emptyList()); cRow = null; @@ -41,7 +44,9 @@ class Leaf extends Node { // timestamps will be updated automatically } - /** creates unsplit leaf associated with the given core row and observations. */ + /** + * Creates unsplit leaf associated with the given core row and observations. + */ Leaf(CoreRow cRow, int numCRows, int numSufs, List cellIds) { super(cellIds); this.cRow = cRow; @@ -65,10 +70,10 @@ void update(List> cRows, int numSufs) { sep = null; } - // since suffixes and core rows grow monotonically, - // the separator only needs to be recomputed whenever - // new compatible core prefixes emerge - // or when the suffix set grows. + /* + * since suffixes and core rows grow monotonically, the separator only needs to be recomputed whenever new + * compatible core prefixes emerge or when the suffix set grows. + */ for (int i = lastNumCRows; i < cRows.size(); i++) { final CoreRow c = cRows.get(i); diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java index 0ad49b557..c7f18039b 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java @@ -20,7 +20,10 @@ class Node { // type parameters required for safe casting - final List cellsIds; // cell identifiers of the fringe rows at this node + /** + * Cell identifiers of the fringe rows at this node. + */ + final List cellsIds; final BitSet remRows; protected Node(List cellsIds) { diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java index 6412190e9..41873ea06 100644 --- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java +++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java @@ -24,8 +24,8 @@ import net.automatalib.word.Word; /** - * optimized implementation of the Ls learning algorithm, - * as described in section 6 of the paper "Learning Mealy Machines with Sparse Observation Tables". + * Optimized implementation of the Ls learning algorithm, as described in the paper Learning Mealy + * Machines with Sparse Observation Tables by Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner. */ public class SparseLearner extends GenericSparseLearner { diff --git a/algorithms/active/sparse/src/main/java/module-info.java b/algorithms/active/sparse/src/main/java/module-info.java index d59599e07..0d964d4a0 100644 --- a/algorithms/active/sparse/src/main/java/module-info.java +++ b/algorithms/active/sparse/src/main/java/module-info.java @@ -15,7 +15,9 @@ */ /** - * This module provides the implementation of the Sparse OT learning algorithm as described in the paper "Learning Mealy Machines with Sparse Observation Tables". + * This module provides the implementation of the Sparse OT learning algorithm as described in the paper Learning Mealy Machines with Sparse Observation Tables by Wolffhardt Schwabe, Paul Kogel, and Sabine + * Glesner. *

* This module is provided by the following Maven dependency: *

diff --git a/distribution/pom.xml b/distribution/pom.xml
index aa0c421c8..7f41f7d72 100644
--- a/distribution/pom.xml
+++ b/distribution/pom.xml
@@ -102,6 +102,11 @@ limitations under the License.
             learnlib-procedural
         
 
+        
+            de.learnlib
+            learnlib-sparse
+        
+
         
             de.learnlib
             learnlib-ttt
@@ -354,6 +359,13 @@ limitations under the License.
                     sources
                 
 
+                
+                    de.learnlib
+                    learnlib-sparse
+                    ${project.version}
+                    sources
+                
+
                 
                     de.learnlib
                     learnlib-ttt
diff --git a/pom.xml b/pom.xml
index 4c88da799..3fcf1f368 100644
--- a/pom.xml
+++ b/pom.xml
@@ -354,6 +354,11 @@ limitations under the License.
                 learnlib-procedural
                 ${project.version}
             
+            
+                de.learnlib
+                learnlib-sparse
+                ${project.version}
+            
             
                 de.learnlib
                 learnlib-ttt

From 649d54fdd1cb156f3558d710b04ce26ea71d70a9 Mon Sep 17 00:00:00 2001
From: Markus Frohme 
Date: Sat, 20 Sep 2025 20:27:40 +0200
Subject: [PATCH 08/15] some nullability augmentations

---
 algorithms/active/sparse/pom.xml                            | 5 +++++
 .../src/main/java/de/learnlib/algorithm/sparse/Leaf.java    | 6 ++++--
 algorithms/active/sparse/src/main/java/module-info.java     | 3 +++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/algorithms/active/sparse/pom.xml b/algorithms/active/sparse/pom.xml
index d1c6fbb15..95294f065 100644
--- a/algorithms/active/sparse/pom.xml
+++ b/algorithms/active/sparse/pom.xml
@@ -63,6 +63,11 @@ limitations under the License.
             automata-core
         
 
+        
+            org.checkerframework
+            checker-qual
+        
+
         
         
             de.learnlib.testsupport
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
index bd7dd3334..f417a4c20 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
@@ -18,9 +18,11 @@
 import java.util.Collections;
 import java.util.List;
 
+import org.checkerframework.checker.nullness.qual.Nullable;
+
 class Leaf extends Node {
 
-    final CoreRow cRow;
+    final @Nullable CoreRow cRow;
     private boolean split;
     private int lastNumCRows;
     private int lastNumSufs;
@@ -30,7 +32,7 @@ class Leaf extends Node {
      * information is used as a logical timestamp to check whether the separator is still guaranteed to be optimal or if
      * it needs to be recomputed.
      */
-    Separator sep;
+    @Nullable Separator sep;
 
     /**
      * Creates split leaf without observations.
diff --git a/algorithms/active/sparse/src/main/java/module-info.java b/algorithms/active/sparse/src/main/java/module-info.java
index 0d964d4a0..4b3c08d1a 100644
--- a/algorithms/active/sparse/src/main/java/module-info.java
+++ b/algorithms/active/sparse/src/main/java/module-info.java
@@ -37,5 +37,8 @@
     requires net.automatalib.api;
     requires net.automatalib.common.util;
 
+    // annotations are 'provided'-scoped and do not need to be loaded at runtime
+    requires static org.checkerframework.checker.qual;
+
     exports de.learnlib.algorithm.sparse;
 }

From 70e1afba359770c29249791d3f39596291b38e62 Mon Sep 17 00:00:00 2001
From: Markus Frohme 
Date: Sun, 21 Sep 2025 00:44:31 +0200
Subject: [PATCH 09/15] more cleanups

---
 .../src/main/java/de/learnlib/algorithm/sparse/FringeRow.java | 4 ++--
 .../de/learnlib/algorithm/sparse/GenericSparseLearner.java    | 4 ++--
 .../src/main/java/de/learnlib/algorithm/sparse/Leaf.java      | 2 +-
 .../java/{ => de/learnlib/algorithm}/sparse/it/SparseIT.java  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)
 rename algorithms/active/sparse/src/test/java/{ => de/learnlib/algorithm}/sparse/it/SparseIT.java (97%)

diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
index f823d6005..d91994a7c 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
@@ -42,8 +42,8 @@ class FringeRow extends Row {
      */
     O transOut;
     /**
-     * For compression, fringe rows do not store observations directly. instead, they point to some leaf in a tree
-     * encoding their classification history. this trick avoids redundantly storing identical observations.
+     * For compression, fringe rows do not store observations directly. Instead, they point to some leaf in a tree
+     * encoding their classification history. This trick avoids redundantly storing identical observations.
      */
     Leaf leaf;
 
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
index a2eec9838..7eb094c6d 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
@@ -290,8 +290,8 @@ private int moveToCore(FringeRow f, List cellIds) {
     }
 
     /**
-     * Takes fringe row and its observations, Queries the missing entries and returns a list containing the observations
-     * for all suffixes.
+     * Takes fringe row and its observations, queries the missing entries, and returns a list containing the
+     * observations for all suffixes.
      */
     private List completeRowObservations(FringeRow f, List cellIds) {
         final Set> sufsPresent = new HashSet<>(HashUtil.capacity(cellIds.size()));
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
index f417a4c20..90e41332f 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
@@ -28,7 +28,7 @@ class Leaf extends Node {
     private int lastNumSufs;
 
     /**
-     * Split leafs always remember how many core rows and suffixes the table contained at their last visit. Tis
+     * Split leafs always remember how many core rows and suffixes the table contained at their last visit. This
      * information is used as a logical timestamp to check whether the separator is still guaranteed to be optimal or if
      * it needs to be recomputed.
      */
diff --git a/algorithms/active/sparse/src/test/java/sparse/it/SparseIT.java b/algorithms/active/sparse/src/test/java/de/learnlib/algorithm/sparse/it/SparseIT.java
similarity index 97%
rename from algorithms/active/sparse/src/test/java/sparse/it/SparseIT.java
rename to algorithms/active/sparse/src/test/java/de/learnlib/algorithm/sparse/it/SparseIT.java
index c708097a3..82129ba06 100644
--- a/algorithms/active/sparse/src/test/java/sparse/it/SparseIT.java
+++ b/algorithms/active/sparse/src/test/java/de/learnlib/algorithm/sparse/it/SparseIT.java
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package sparse.it;
+package de.learnlib.algorithm.sparse.it;
 
 import de.learnlib.algorithm.sparse.SparseLearner;
 import de.learnlib.oracle.MembershipOracle.MealyMembershipOracle;

From e302102063d8352bf75343a2b9e67a1f0a9e2507 Mon Sep 17 00:00:00 2001
From: Markus Frohme 
Date: Sun, 21 Sep 2025 00:44:57 +0200
Subject: [PATCH 10/15] more cleanups

---
 .../sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
index 90e41332f..fdd91a9ad 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
@@ -73,7 +73,7 @@ void update(List> cRows, int numSufs) {
         }
 
         /*
-         * since suffixes and core rows grow monotonically, the separator only needs to be recomputed whenever new
+         * Since suffixes and core rows grow monotonically, the separator only needs to be recomputed whenever new
          * compatible core prefixes emerge or when the suffix set grows.
          */
 

From fb16e8d84eb89beb31eaea1b54bfbef1d5e18517 Mon Sep 17 00:00:00 2001
From: Markus Frohme 
Date: Mon, 22 Sep 2025 17:02:29 +0200
Subject: [PATCH 11/15] update README

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index dd2610ed6..66f6bb6d2 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,7 @@ Currently, the following learning algorithms with respective target models are s
 | DHC                 | `Mealy`                     |     |                       |                       |
 | Kearns & Vazirani   | `DFA` `Mealy`               |     |                       |                       |
 | Lambda              | `DFA` `Mealy`               |     |                       |                       |
+| Ls       | `Mealy`                     |     |                       |                       |
 | L#                  | `Mealy`                     |     |                       |                       |
 | L* (incl. variants) | `DFA` `Mealy` `Moore`       |     |                       |                       |
 | NL*                 | `NFA`                       |     |                       |                       |

From 34d46516162ce49622af3a4e09fd392a40336533 Mon Sep 17 00:00:00 2001
From: Markus Frohme 
Date: Mon, 29 Sep 2025 14:32:47 +0200
Subject: [PATCH 12/15] update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1a3d68f22..9cf2b715a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+* Added the Ls active learning algorithm (thanks to [Wolffhardt Schwabe](https://github.com/stateMachinist)).
 * Added an `EarlyExitEQOracle` which for a given `AdaptiveMembershipOracle` and `TestWordGenerator` stops the evaluation of (potentially long) Mealy-based equivalence tests as soon as a mismatch with the hypothesis is detected, potentially improving the symbol performance of the given equivalence oracle.
 
 ### Changed

From 9908fafd939b818e8f465c72db2508a9e84c84e8 Mon Sep 17 00:00:00 2001
From: Markus Frohme 
Date: Sat, 4 Oct 2025 20:35:52 +0200
Subject: [PATCH 13/15] add DOIs

---
 algorithms/active/sparse/pom.xml                             | 4 ++--
 .../java/de/learnlib/algorithm/sparse/SparseLearner.java     | 5 +++--
 algorithms/active/sparse/src/main/java/module-info.java      | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/algorithms/active/sparse/pom.xml b/algorithms/active/sparse/pom.xml
index 95294f065..65f3616e2 100644
--- a/algorithms/active/sparse/pom.xml
+++ b/algorithms/active/sparse/pom.xml
@@ -30,8 +30,8 @@ limitations under the License.
     LearnLib :: Algorithms :: Sparse
     
         This artifact provides the implementation of the Sparse OT learning algorithm as described in the paper
-        "Learning Mealy Machines with Sparse Observation Tables" (TODO) by Wolffhardt Schwabe, Paul Kogel, and Sabine
-        Glesner.
+        "Learning Mealy Machines with Sparse Observation Tables" (https://doi.org/10.1007/978-3-032-05792-1_10) by
+        Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
     
 
     
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java
index 41873ea06..386713824 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java
@@ -24,8 +24,9 @@
 import net.automatalib.word.Word;
 
 /**
- * Optimized implementation of the Ls learning algorithm, as described in the paper Learning Mealy
- * Machines with Sparse Observation Tables by Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
+ * Optimized implementation of the Ls learning algorithm, as described in the paper Learning Mealy Machines with Sparse Observation Tables by
+ * Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
  */
 public class SparseLearner extends GenericSparseLearner {
 
diff --git a/algorithms/active/sparse/src/main/java/module-info.java b/algorithms/active/sparse/src/main/java/module-info.java
index 4b3c08d1a..f2ef0c7ea 100644
--- a/algorithms/active/sparse/src/main/java/module-info.java
+++ b/algorithms/active/sparse/src/main/java/module-info.java
@@ -16,8 +16,8 @@
 
 /**
  * This module provides the implementation of the Sparse OT learning algorithm as described in the paper Learning Mealy Machines with Sparse Observation Tables by Wolffhardt Schwabe, Paul Kogel, and Sabine
- * Glesner.
+ * href="https://doi.org/10.1007/978-3-032-05792-1_10">Learning Mealy Machines with Sparse Observation Tables by
+ * Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
  * 

* This module is provided by the following Maven dependency: *


From 72524f6026730d194b2aec4dee00ade1f32ad9f1 Mon Sep 17 00:00:00 2001
From: stateMachinist <190835587+stateMachinist@users.noreply.github.com>
Date: Thu, 9 Oct 2025 22:01:23 +0200
Subject: [PATCH 14/15] reverted some refactorings and extended comments

---
 .../de/learnlib/algorithm/sparse/CoreRow.java | 14 +++-
 .../learnlib/algorithm/sparse/FringeRow.java  | 11 ++-
 .../sparse/GenericSparseLearner.java          | 80 +++++++++----------
 .../de/learnlib/algorithm/sparse/Leaf.java    | 13 ++-
 .../de/learnlib/algorithm/sparse/Node.java    |  7 +-
 .../algorithm/sparse/SparseLearner.java       |  6 +-
 .../sparse/src/main/java/module-info.java     |  6 +-
 7 files changed, 73 insertions(+), 64 deletions(-)

diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java
index 211ea8ba7..1dd6437ff 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java
@@ -22,22 +22,28 @@
 
 import net.automatalib.word.Word;
 
+/**
+ * Each core row represents some hypothesis state and stores its outputs for all table suffixes.
+ */
 class CoreRow extends Row {
 
     /**
      * Hypothesis state associated with this row.
      */
     final S state;
+
     /**
-     * Index in core row list.
+     * Index of this row in the core row list.
      */
     final int idx;
+
     /**
-     * Maps suffixes to the outputs contained in this row.
+     * Maps suffixes to their outputs.
      */
     final Map, Word> sufToOut;
+
     /**
-     * Also store identifiers of suffix-output pairs for fast compatibility checking.
+     * Identifiers of all suffix-output pairs in this row, used for fast compatibility checking.
      */
     final Set cellIds;
 
@@ -46,7 +52,7 @@ class CoreRow extends Row {
         this.state = state;
         this.idx = idx;
         sufToOut = new HashMap<>();
-        cellIds = new HashSet<>();
+        cellIds = new HashSet<>(); // use HashSet to enable fast containment checks
     }
 
     void addSuffix(Word suf, Word out, int cell) {
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
index d91994a7c..c613a6c73 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
@@ -18,7 +18,8 @@
 import net.automatalib.word.Word;
 
 /**
- * Each fringe row represents a hypothesis transition.
+ * Each fringe row represents some hypothesis transition
+ * outside the spanning tree defined by the core prefixes.
  *
  * @param 
  *         state type
@@ -33,17 +34,21 @@ class FringeRow extends Row {
      * Source state.
      */
     final S srcState;
+
     /**
      * Input symbol.
      */
     final I transIn;
+
     /**
      * Output symbol (determined dynamically).
      */
     O transOut;
+
     /**
-     * For compression, fringe rows do not store observations directly. Instead, they point to some leaf in a tree
-     * encoding their classification history. This trick avoids redundantly storing identical observations.
+     * For compression, fringe rows do not store observations directly.
+     * Instead, they point to some leaf in a tree encoding their classification history.
+     * This trick avoids redundantly storing identical observations.
      */
     Leaf leaf;
 
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
index 7eb094c6d..9f15bc463 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
@@ -21,11 +21,10 @@
 import java.util.Collections;
 import java.util.Deque;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.function.Function;
+import java.util.stream.Collectors;
 
 import de.learnlib.algorithm.LearningAlgorithm.MealyLearner;
 import de.learnlib.counterexample.LocalSuffixFinders;
@@ -35,7 +34,6 @@
 import net.automatalib.alphabet.Alphabet;
 import net.automatalib.automaton.transducer.MealyMachine;
 import net.automatalib.automaton.transducer.MutableMealyMachine;
-import net.automatalib.common.util.HashUtil;
 import net.automatalib.common.util.Pair;
 import net.automatalib.word.Word;
 
@@ -43,49 +41,59 @@ class GenericSparseLearner implements MealyLearner {
 
     private final Alphabet alphabet;
     private final MealyMembershipOracle oracle;
+
     /**
      * Suffixes.
      */
     private final Deque> sufs;
+
     /**
      * Core rows.
      */
     private final List> cRows;
+
     /**
      * Fringe rows.
      */
     private final Deque> fRows;
+
     /**
-     * Fringe prefix to row.
+     * Maps fringe prefixes to rows.
      */
     private final Map, FringeRow> prefToFringe;
+
     /**
-     * List of unique cells.
+     * List of unique suffix-output cells.
      */
     private final List, Word>> cells;
+
     /**
-     * Maps each unique cell to its list index.
+     * Maps each suffix-output cell to its list index.
      */
     private final Map, Word>, Integer> cellToIdx;
+
     /**
      * Hypothesis.
      */
     private final MutableMealyMachine hyp;
+
     /**
      * Maps each state to its core row prefix.
      */
     private final Map> stateToPrefix;
+
     /**
-     * Access sequences.
+     * Computes access sequences.
      */
     private final Function, Word> accSeq;
 
     /**
-     * For fast suffix ranking, we track for each suffix how the core rows are partitioned by it.
+     * For fast suffix ranking, this map stores the core row partitions created by each suffix.
      */
     private final Map, List> sufToVecs;
+
     /**
-     * See {@link #sufToVecs}.
+     * Helper map for efficiently constructing the suffix partition map (see {@link #sufToVecs}).
      */
     private final Map, Map, Integer>> sufToOutToIdx;
 
@@ -122,7 +130,8 @@ public void startLearning() {
         stateToPrefix.put(init, c.prefix);
         extendFringe(c, init, new Leaf<>(c, 1, sufs.size(), Collections.emptyList()));
         fRows.forEach(f -> query(f, Word.epsilon())); // query transition outputs
-        // initially, transition outputs must be queried manually for later transitions, they derive from suffix queries
+        // initially, transition outputs must be queried manually,
+        // for later transitions, they derive from suffix queries
         updateHypothesis();
     }
 
@@ -188,7 +197,8 @@ private Word pickSuffix(BitSet remRows) {
 
             assert sumOccur == remRows.cardinality();
             if (maxOccur < bestRank) {
-                // among equally ranked suffixes, pick youngest (mind that suffixes are stored/iterated LIFO)
+                // among equally ranked suffixes, pick youngest
+                // (mind that suffixes are stored/iterated LIFO)
                 bestSuf = s;
                 bestRank = maxOccur;
                 if (bestRank == 1) { // optimization: no better suffix is possible
@@ -219,15 +229,11 @@ private void followNode(FringeRow f, Node n) {
         }
 
         final BitSet remRows = new BitSet();
-        for (int i = sep.remRows.nextSetBit(0); i >= 0; i = sep.remRows.nextSetBit(i + 1)) {
-            if (cRows.get(i).cellIds.contains(cellIdx)) {
-                remRows.set(i);
-            }
-        }
-        final List cellIds = new ArrayList<>(sep.cellsIds.size() + 1);
-        cellIds.addAll(sep.cellsIds);
+        sep.remRows.stream().filter(i -> cRows.get(i).cellIds.contains(cellIdx)).forEach(remRows::set);
+        final List cellIds = new ArrayList<>(sep.cellsIds); // important: copy elements!
         cellIds.add(cellIdx);
-        if (remRows.isEmpty()) { // no compatible core prefix
+        if (remRows.isEmpty()) {
+            // no compatible core prefix
             f.leaf = null;
             moveToCore(f, cellIds);
         } else if (remRows.cardinality() == 1) {
@@ -252,7 +258,8 @@ private Word query(Row r, Word suf) {
     }
 
     /**
-     * Adds suffix-output pair to index if not yet contained and returns a unique identifier representing the pair.
+     * Adds suffix-output pair to index if not yet contained,
+     * and returns a unique identifier representing the pair.
      */
     private int getUniqueCellIdx(Word suf, Word out) {
         assert suf.length() == out.length();
@@ -270,7 +277,7 @@ private int getUniqueCellIdx(Word suf, Word out) {
      * Returns index of new core row.
      */
     private int moveToCore(FringeRow f, List cellIds) {
-        boolean removed = fRows.remove(f);
+        final boolean removed = fRows.remove(f);
         assert removed;
         final S state = hyp.addState();
         final CoreRow c = new CoreRow<>(f.prefix, state, cRows.size());
@@ -290,25 +297,14 @@ private int moveToCore(FringeRow f, List cellIds) {
     }
 
     /**
-     * Takes fringe row and its observations, queries the missing entries, and returns a list containing the
-     * observations for all suffixes.
+     * Takes fringe row and its observations, queries the missing entries,
+     * and returns a list containing the observations for all suffixes.
      */
     private List completeRowObservations(FringeRow f, List cellIds) {
-        final Set> sufsPresent = new HashSet<>(HashUtil.capacity(cellIds.size()));
-        for (Integer id : cellIds) {
-            sufsPresent.add(this.cells.get(id).getFirst());
-        }
-        final List> sufsMissing = new ArrayList<>(sufs.size());
-        for (Word s : sufs) {
-            if (!sufsPresent.contains(s)) {
-                sufsMissing.add(s);
-            }
-        }
-        final List cellIdsFull = new ArrayList<>(cellIds.size() + sufsMissing.size());
-        cellIdsFull.addAll(cellIds);
-        for (Word s : sufsMissing) {
-            cellIdsFull.add(getUniqueCellIdx(s, query(f, s)));
-        }
+        final List> sufsPresent = cellIds.stream().map(c -> this.cells.get(c).getFirst()).collect(Collectors.toList());
+        final List> sufsMissing = sufs.stream().filter(s -> !sufsPresent.contains(s)).collect(Collectors.toList());
+        final List cellIdsFull = new ArrayList<>(cellIds); // important: copy elements!
+        sufsMissing.forEach(s -> cellIdsFull.add(getUniqueCellIdx(s, query(f, s))));
         return cellIdsFull;
     }
 
@@ -340,11 +336,9 @@ private void identifyNewState(DefaultQuery> q) {
     private void addSuffixToTable(Word suf) {
         assert !sufs.contains(suf);
         sufs.push(suf);
-        /*
-         * This might be an extension of an existing suffix -> storing/iterating suffixes in LIFO order exploits caching
-         * when filling core rows. Similarly, since core rows are prefix-closed, cache hit rate for adding suffixes is
-         * maximized by iterating core rows in LIFO order
-         */
+
+        // since core rows are prefix-closed,
+        // cache hit rate is maximized by LIFO iteration
         for (int i = cRows.size() - 1; i >= 0; i--) {
             addSuffixToCoreRow(cRows.get(i), suf);
         }
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
index fdd91a9ad..44739899c 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
@@ -28,9 +28,9 @@ class Leaf extends Node {
     private int lastNumSufs;
 
     /**
-     * Split leafs always remember how many core rows and suffixes the table contained at their last visit. This
-     * information is used as a logical timestamp to check whether the separator is still guaranteed to be optimal or if
-     * it needs to be recomputed.
+     * Split leafs always remember how many core rows and suffixes the table contained
+     * at their last visit. This information is used as a logical timestamp to check
+     * if the separator is still guaranteed to be optimal or if it needs to be recomputed.
      */
     @Nullable Separator sep;
 
@@ -72,10 +72,9 @@ void update(List> cRows, int numSufs) {
             sep = null;
         }
 
-        /*
-         * Since suffixes and core rows grow monotonically, the separator only needs to be recomputed whenever new
-         * compatible core prefixes emerge or when the suffix set grows.
-         */
+        // Since suffixes and core rows grow monotonically,
+        // the separator only needs to be recomputed whenever
+        // new compatible core prefixes emerge or the suffix set grows.
 
         for (int i = lastNumCRows; i < cRows.size(); i++) {
             final CoreRow c = cRows.get(i);
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java
index c7f18039b..e28574e3e 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java
@@ -21,9 +21,14 @@
 class Node { // type parameters required for safe casting
 
     /**
-     * Cell identifiers of the fringe rows at this node.
+     * Suffix-output cell identifiers of the fringe rows that share this node.
      */
     final List cellsIds;
+
+    /**
+     * Bit vector indicating the core rows that remain compatible
+     * with the observations associated with this node.
+     */
     final BitSet remRows;
 
     protected Node(List cellsIds) {
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java
index 386713824..ec500d666 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/SparseLearner.java
@@ -24,9 +24,9 @@
 import net.automatalib.word.Word;
 
 /**
- * Optimized implementation of the Ls learning algorithm, as described in the paper Learning Mealy Machines with Sparse Observation Tables by
- * Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
+ * Optimized implementation of the Ls learning algorithm, as described in the paper
+ * Learning Mealy Machines with Sparse Observation Tables
+ * by Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
  */
 public class SparseLearner extends GenericSparseLearner {
 
diff --git a/algorithms/active/sparse/src/main/java/module-info.java b/algorithms/active/sparse/src/main/java/module-info.java
index f2ef0c7ea..308fe8fe7 100644
--- a/algorithms/active/sparse/src/main/java/module-info.java
+++ b/algorithms/active/sparse/src/main/java/module-info.java
@@ -15,9 +15,9 @@
  */
 
 /**
- * This module provides the implementation of the Sparse OT learning algorithm as described in the paper Learning Mealy Machines with Sparse Observation Tables by
- * Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
+ * This module provides the implementation of the Sparse OT learning algorithm as described in the paper
+ * Learning Mealy Machines with Sparse Observation Tables
+ * by Wolffhardt Schwabe, Paul Kogel, and Sabine Glesner.
  * 

* This module is provided by the following Maven dependency: *


From 8d868a74add855bcc5dc3109e7a734d0e3d0bcdf Mon Sep 17 00:00:00 2001
From: stateMachinist <190835587+stateMachinist@users.noreply.github.com>
Date: Fri, 10 Oct 2025 16:46:02 +0200
Subject: [PATCH 15/15] improved documentation

---
 .../de/learnlib/algorithm/sparse/CoreRow.java |  9 ++-
 .../learnlib/algorithm/sparse/FringeRow.java  |  2 +-
 .../sparse/GenericSparseLearner.java          | 25 ++++---
 .../de/learnlib/algorithm/sparse/Leaf.java    | 70 ++++++++++++-------
 .../de/learnlib/algorithm/sparse/Node.java    | 24 ++++---
 .../learnlib/algorithm/sparse/Separator.java  | 10 ++-
 6 files changed, 89 insertions(+), 51 deletions(-)

diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java
index 1dd6437ff..cbd32de1a 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/CoreRow.java
@@ -23,7 +23,8 @@
 import net.automatalib.word.Word;
 
 /**
- * Each core row represents some hypothesis state and stores its outputs for all table suffixes.
+ * Each core row represents some hypothesis state
+ * and stores its outputs for all table suffixes.
  */
 class CoreRow extends Row {
 
@@ -34,6 +35,7 @@ class CoreRow extends Row {
 
     /**
      * Index of this row in the core row list.
+     * Used as a unique address.
      */
     final int idx;
 
@@ -43,7 +45,8 @@ class CoreRow extends Row {
     final Map, Word> sufToOut;
 
     /**
-     * Identifiers of all suffix-output pairs in this row, used for fast compatibility checking.
+     * Identifiers of all suffix-output pairs in this row,
+     * used for fast compatibility checking.
      */
     final Set cellIds;
 
@@ -52,7 +55,7 @@ class CoreRow extends Row {
         this.state = state;
         this.idx = idx;
         sufToOut = new HashMap<>();
-        cellIds = new HashSet<>(); // use HashSet to enable fast containment checks
+        cellIds = new HashSet<>(); // use HashSet for fast containment checks
     }
 
     void addSuffix(Word suf, Word out, int cell) {
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
index c613a6c73..dcf88e56e 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/FringeRow.java
@@ -41,7 +41,7 @@ class FringeRow extends Row {
     final I transIn;
 
     /**
-     * Output symbol (determined dynamically).
+     * Output symbol (determined lazily).
      */
     O transOut;
 
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
index 9f15bc463..f9c805ddf 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/GenericSparseLearner.java
@@ -48,12 +48,12 @@ class GenericSparseLearner implements MealyLearner {
     private final Deque> sufs;
 
     /**
-     * Core rows.
+     * List of core rows. Rows can be addressed by their index in this list.
      */
     private final List> cRows;
 
     /**
-     * Fringe rows.
+     * Fringe rows (stored in a stack for LIFO iteration).
      */
     private final Deque> fRows;
 
@@ -63,12 +63,13 @@ class GenericSparseLearner implements MealyLearner {
     private final Map, FringeRow> prefToFringe;
 
     /**
-     * List of unique suffix-output cells.
+     * List of unique suffix-output pairs, addressable by index.
+     * Used for table compression: table entries only hold cell index.
      */
     private final List, Word>> cells;
 
     /**
-     * Maps each suffix-output cell to its list index.
+     * Maps each suffix-output pair to its index (see {@link #cells}).
      */
     private final Map, Word>, Integer> cellToIdx;
 
@@ -159,13 +160,14 @@ private void updateHypothesis() {
                 return;
             } else {
                 assert f.transOut != null;
+                assert f.leaf.cRow != null;
                 hyp.setTransition(f.srcState, f.transIn, f.leaf.cRow.state, f.transOut);
             }
         }
     }
 
     private void classifyFringePrefix(FringeRow f) {
-        f.leaf.update(cRows, sufs.size());
+        f.leaf.update(cRows);
         if (f.leaf.isUnsplit()) {
             return;
         }
@@ -174,7 +176,7 @@ private void classifyFringePrefix(FringeRow f) {
         if (sep != null) {
             followNode(f, sep);
         } else {
-            f.leaf.sep = new Separator<>(pickSuffix(f.leaf.remRows), f.leaf.remRows, f.leaf.cellsIds);
+            f.leaf.sep = new Separator<>(pickSuffix(f.leaf.remRows), f.leaf.remRows, f.leaf.cellIds);
             followNode(f, f.leaf.sep);
         }
     }
@@ -212,7 +214,7 @@ private Word pickSuffix(BitSet remRows) {
     }
 
     private void followNode(FringeRow f, Node n) {
-        if (n instanceof Leaf) {
+        if (n instanceof Leaf) { // TODO simplify when switching to newer java
             final Leaf l = (Leaf) n;
             assert l.isUnsplit();
             f.leaf = l;
@@ -230,7 +232,7 @@ private void followNode(FringeRow f, Node n) {
 
         final BitSet remRows = new BitSet();
         sep.remRows.stream().filter(i -> cRows.get(i).cellIds.contains(cellIdx)).forEach(remRows::set);
-        final List cellIds = new ArrayList<>(sep.cellsIds); // important: copy elements!
+        final List cellIds = new ArrayList<>(sep.cellIds); // important: copy elements!
         cellIds.add(cellIdx);
         if (remRows.isEmpty()) {
             // no compatible core prefix
@@ -249,7 +251,7 @@ private void followNode(FringeRow f, Node n) {
 
     private Word query(Row r, Word suf) {
         final Word out = oracle.answerQuery(r.prefix.concat(suf));
-        if (r instanceof FringeRow) {
+        if (r instanceof FringeRow) { // TODO simplify when switching to newer java
             final FringeRow f = (FringeRow) r;
             f.transOut = out.prefix(f.prefix.length()).lastSymbol();
         }
@@ -301,6 +303,7 @@ private int moveToCore(FringeRow f, List cellIds) {
      * and returns a list containing the observations for all suffixes.
      */
     private List completeRowObservations(FringeRow f, List cellIds) {
+        // TODO simplify collector calls when switching to newer java
         final List> sufsPresent = cellIds.stream().map(c -> this.cells.get(c).getFirst()).collect(Collectors.toList());
         final List> sufsMissing = sufs.stream().filter(s -> !sufsPresent.contains(s)).collect(Collectors.toList());
         final List cellIdsFull = new ArrayList<>(cellIds); // important: copy elements!
@@ -325,8 +328,8 @@ private void identifyNewState(DefaultQuery> q) {
         final Word u = accSeq.apply(cex.prefix(idxSym));
         final Word ui = u.append(cex.getSymbol(idxSym));
         final FringeRow f = prefToFringe.get(ui);
-        assert f.leaf.isUnsplit();
-        final int cRowIdx = moveToCore(f, f.leaf.cellsIds);
+        final int cRowIdx = moveToCore(f, f.leaf.cellIds);
+        assert f.leaf.cRow != null;
         if (f.leaf.cRow.cellIds.containsAll(cRows.get(cRowIdx).cellIds)) {
             // only add new suffix if the row is not yet distinguished
             addSuffixToTable(cex.subWord(idxSuf));
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
index 44739899c..8ccfba23d 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Leaf.java
@@ -20,67 +20,85 @@
 
 import org.checkerframework.checker.nullness.qual.Nullable;
 
+/**
+ * Leaves can be split or unsplit.
+ * An unsplit leaf has a single compatible core row to which it points.
+ * As new core rows emerge, the observations of the leaf may not suffice
+ * anymore to uniquely assign it to some core row. Then, it becomes split.
+ * Split leaves cache suffix selection by reference to separators.
+ * Leaves remember how many core rows and suffixes existed at their last visit.
+ * This information is used as a logical timestamp to check
+ * if the separator is still known to be optimal
+ * or if it needs to be recomputed.
+ */
 class Leaf extends Node {
 
-    final @Nullable CoreRow cRow;
-    private boolean split;
-    private int lastNumCRows;
-    private int lastNumSufs;
+    /**
+     * Core row associated with this leaf (null if split, see {@link Leaf}).
+     */
+    @Nullable CoreRow cRow;
 
     /**
-     * Split leafs always remember how many core rows and suffixes the table contained
-     * at their last visit. This information is used as a logical timestamp to check
-     * if the separator is still guaranteed to be optimal or if it needs to be recomputed.
+     * Separator cached by this leaf (see {@link Leaf}).
      */
     @Nullable Separator sep;
 
+    private int lastNumCRows;
+    private int lastNumSufs;
+
+    private Leaf(int numCRows, int numSufs, List cellIds) {
+        super(cellIds);
+        this.lastNumCRows = numCRows;
+        this.lastNumSufs = numSufs;
+    }
+
     /**
-     * Creates split leaf without observations.
+     * Creates split leaf without observations (see {@link Leaf}).
      */
     Leaf() {
-        super(Collections.emptyList());
-        cRow = null;
-        split = true;
-        lastNumCRows = 0;
-        lastNumSufs = 0;
+        this(0, 0, Collections.emptyList());
         // timestamps will be updated automatically
+        cRow = null;
     }
 
     /**
-     * Creates unsplit leaf associated with the given core row and observations.
+     * Creates unsplit leaf associated with the given core row and observations
+     * (see {@link Leaf}).
      */
     Leaf(CoreRow cRow, int numCRows, int numSufs, List cellIds) {
-        super(cellIds);
+        this(numCRows, numSufs, cellIds);
         this.cRow = cRow;
         remRows.set(cRow.idx);
-        split = false;
-        lastNumCRows = numCRows;
-        lastNumSufs = numSufs;
     }
 
+    /**
+     * See {@link Leaf}.
+     */
     boolean isUnsplit() {
-        return !split;
+        return cRow != null;
     }
 
-    void update(List> cRows, int numSufs) {
+    void update(List> cRows) {
         assert lastNumCRows <= cRows.size();
         if (lastNumCRows == cRows.size()) {
-            assert lastNumSufs == numSufs;
             return;
-        } else if (numSufs > lastNumSufs) {
+        }
+
+        final int numSufs = cRows.get(0).sufToOut.size();
+        if (numSufs > lastNumSufs) {
             lastNumSufs = numSufs;
             sep = null;
         }
 
-        // Since suffixes and core rows grow monotonically,
+        // since suffixes and core rows grow monotonically,
         // the separator only needs to be recomputed whenever
-        // new compatible core prefixes emerge or the suffix set grows.
+        // new compatible core prefixes emerge or the suffix set grows
 
         for (int i = lastNumCRows; i < cRows.size(); i++) {
             final CoreRow c = cRows.get(i);
-            if (c.cellIds.containsAll(cellsIds)) {
+            if (c.cellIds.containsAll(cellIds)) {
                 remRows.set(c.idx);
-                split = true;
+                cRow = null; // split leaf
                 sep = null;
             }
         }
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java
index e28574e3e..12ac68ba1 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Node.java
@@ -18,25 +18,33 @@
 import java.util.BitSet;
 import java.util.List;
 
+/**
+ * For table compression and to cache suffix selection,
+ * fringe rows do not store observations, but instead map to some node.
+ * Each node is associated with a set of suffix-output pairs,
+ * potentially representing multiple rows with identical observations.
+ * Nodes are either leaves or separators.
+ */
 class Node { // type parameters required for safe casting
 
     /**
-     * Suffix-output cell identifiers of the fringe rows that share this node.
+     * Identifiers of suffix-output pairs associated with this node.
      */
-    final List cellsIds;
+    final List cellIds;
 
     /**
-     * Bit vector indicating the core rows that remain compatible
-     * with the observations associated with this node.
+     * Bit vector encoding which core rows remain compatible
+     * with the observations at this node.
+     * Rows are represented by their index.
      */
     final BitSet remRows;
 
-    protected Node(List cellsIds) {
-        this(cellsIds, new BitSet());
+    protected Node(List cellIds) {
+        this(cellIds, new BitSet());
     }
 
-    protected Node(List cellsIds, BitSet remRows) {
-        this.cellsIds = cellsIds;
+    protected Node(List cellIds, BitSet remRows) {
+        this.cellIds = cellIds;
         this.remRows = remRows;
     }
 }
diff --git a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java
index 61a5f2705..d2a231f24 100644
--- a/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java
+++ b/algorithms/active/sparse/src/main/java/de/learnlib/algorithm/sparse/Separator.java
@@ -22,12 +22,18 @@
 
 import net.automatalib.word.Word;
 
+/**
+ * Separators guide the classification of fringe prefixes.
+ * A separator holds a suffix and a branch map.
+ * The map points to the next node,
+ * depending on the output produced by the suffix.
+ */
 class Separator extends Node {
     final Word suffix;
     final Map, Node> branchMap;
 
-    Separator(Word suffix, BitSet remRows, List cells) {
-        super(cells, remRows);
+    Separator(Word suffix, BitSet remRows, List cellIds) {
+        super(cellIds, remRows);
         this.suffix = suffix;
         branchMap = new HashMap<>();
     }