Skip to content

Commit

Permalink
[SYSTEMDS-3696] Extended incremental slice line (pruning selector)
Browse files Browse the repository at this point in the history
Closes #2098.
  • Loading branch information
fzoepffel authored and mboehm7 committed Sep 7, 2024
1 parent afcc5d8 commit 5283544
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 22 deletions.
40 changes: 29 additions & 11 deletions scripts/builtin/incSliceLine.dml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ m_incSliceLine = function(
Matrix[Double] prevFoffb = matrix(0,0,0), Matrix[Double] prevFoffe = matrix(0,0,0),
list[unknown] prevLattice = list(), list[unknown] metaPrevLattice = list(),
list[unknown] prevStats = list(), Matrix[Double] prevTK = matrix(0,0,0),
Matrix[Double] prevTKC = matrix(0,0,0), Boolean encodeLat = TRUE)
Matrix[Double] prevTKC = matrix(0,0,0), Boolean encodeLat = TRUE,
Int pruningStrat = 0
)
return(
Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D,
list[unknown] L, list[unknown] metaLattice,
Expand All @@ -96,6 +98,20 @@ m_incSliceLine = function(
[TK, TKC, D, L, Stats, Xout, eOut, foffb, foffe, metaLattice, params] = throwNoParamsError();
} else {

disableIncScorePruning = FALSE;
disableIncSizePruning = FALSE;

if(pruningStrat == 1){
disableIncScorePruning = TRUE;
}
if(pruningStrat == 2){
disableIncSizePruning = TRUE;
}
if(pruningStrat == 3){
disableIncScorePruning = TRUE;
disableIncSizePruning = TRUE;
}

t1 = time();

# store params for next run
Expand Down Expand Up @@ -176,12 +192,12 @@ m_incSliceLine = function(

# compute score for lowest scoring prevTK slice to set high min score early on to prune slices based on scores
minsc = -Inf;
if( nrow(prevTK2) > 0 ) {
if( nrow(prevTK2) > 0 & !disableIncScorePruning) {
[minsc] = computeLowestPrevTK (prevTK2, X2, totalE, eAvg, alpha, minsc)
}

# create and score basic slices (conjunctions of 1 feature)
[S, R, selCols] = createAndScoreBasicSlices(X2, changedX2, prevTK2, totalE, eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, verbose);
[S, R, selCols] = createAndScoreBasicSlices(X2, changedX2, prevTK2, totalE, eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, verbose, disableIncScorePruning);

# initialize lattice and statistics for incremental updates
Stats = list();
Expand Down Expand Up @@ -233,11 +249,14 @@ m_incSliceLine = function(
L = append(L, Lrep);

# load one hot encoded previous lattice for the current level
prevLattice2 = preparePrevLattice(prevLattice, metaPrevLattice, prevFoffb,
prevFoffe, foffb, foffe, level, encodeLat, differentOffsets)
prevLattice2 = matrix(0,0,0);
if(!disableIncSizePruning){
prevLattice2 = preparePrevLattice(prevLattice, metaPrevLattice, prevFoffb,
prevFoffe, foffb, foffe, level, encodeLat, differentOffsets)
}

if(selFeat){
if(length(prevLattice2)>0) {
if(length(prevLattice2)>0 & !disableIncSizePruning){
prevLattice2 = removeEmpty(target=prevLattice2, margin="cols", select=t(selCols));
}
S2 = removeEmpty(target=S, margin="cols", select=t(selCols));
Expand All @@ -246,9 +265,9 @@ m_incSliceLine = function(
if(verbose) {
print("\nincSliceLine: level "+level+":")
}

# prune unchanged slices with slice size < minSup
if(level <= length(prevStats)){
if(level <= length(prevStats) & !disableIncSizePruning){
[S, S2] = pruneUnchangedSlices(S, S2, prevLattice2, prevStats, changedX2, minSup, verbose, level);
}

Expand Down Expand Up @@ -309,7 +328,7 @@ m_incSliceLine = function(

createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] changedX2,
Matrix[Double] prevTK2, Matrix[Double] e,
Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha, Double minsc, Boolean verbose)
Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha, Double minsc, Boolean verbose, Boolean disableIncScorePruning)
return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] selCols)
{
n2 = ncol(X2);
Expand Down Expand Up @@ -361,7 +380,7 @@ createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] changedX2
R = cbind(sc, se, sm, ss);

# score pruning of basic slices based on smallest score of all slices in prevTK
if(minsc > -Inf) {
if(minsc > -Inf & !disableIncScorePruning) {
# compute upper bound scores for all basic slices
ubSc = scoreUB(ss, se, sm, eAvg, minSup, alpha, nrow(X2));
fScores = (ubSc >= minsc);
Expand Down Expand Up @@ -759,4 +778,3 @@ removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices)
remain = P2 %*% M;
while(FALSE){} #prevent inlining (TODO rewrite issue)
}

Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,16 @@ public void testTop4HybridTPSelFullFewAdded() {
runIncSliceLineTest(4, "e", false, true,2, 1, false, false, false, ExecMode.HYBRID);
}

@Test
public void testTop4HybridTPSelFullFewAddedDisabledScore() {
runIncSliceLineTest(4, "e", false, true,2, 1, false, false, false, ExecMode.HYBRID, true, false);
}

@Test
public void testTop4HybridTPSelFullFewAddedDisabledSize() {
runIncSliceLineTest(4, "e", false, true,2, 1, false, false, false, ExecMode.HYBRID, false, true);
}

@Test
public void testTop4HybridDPSelFullFewAddedRemoved() {
runIncSliceLineTest(4, "e", true, true,2, 1, false, true, false, ExecMode.HYBRID);
Expand Down Expand Up @@ -982,7 +992,7 @@ public void testIncSliceLineCustomInputsFull() {

};

runIncSliceLineTest(newX, e, 10, "e", false, true, 50, 1, false, false, true, ExecMode.SINGLE_NODE);
runIncSliceLineTest(newX, e, 10, "e", false, true, 50, 1, false, false, true, ExecMode.SINGLE_NODE, false, false);
}

// @Test
Expand Down Expand Up @@ -1050,16 +1060,19 @@ private void runIncSliceLineTest(int K, String err, boolean dp, boolean selCols,

private void runIncSliceLineTest(int K, String err, boolean dp, boolean selCols, int proportionOfTuplesAddedInPercent, int proportionOfTuplesRemovedInPercent, boolean onlyNullEAdded, boolean removeTuples, boolean encodeLat, ExecMode mode) {

runIncSliceLineTest(null, null, K, err, dp, selCols, proportionOfTuplesAddedInPercent, proportionOfTuplesRemovedInPercent, onlyNullEAdded, removeTuples, encodeLat, mode);
runIncSliceLineTest(null, null, K, err, dp, selCols, proportionOfTuplesAddedInPercent, proportionOfTuplesRemovedInPercent, onlyNullEAdded, removeTuples, encodeLat, mode, false, false);

}

private void runIncSliceLineTest(int K, String err, boolean dp, boolean selCols, int proportionOfTuplesAddedInPercent, int proportionOfTuplesRemovedInPercent, boolean onlyNullEAdded, boolean removeTuples, boolean encodeLat, ExecMode mode, boolean disableScore, boolean disableSize) {

runIncSliceLineTest(null, null, K, err, dp, selCols, proportionOfTuplesAddedInPercent, proportionOfTuplesRemovedInPercent, onlyNullEAdded, removeTuples, encodeLat, mode, disableScore, disableSize);

}


private void runIncSliceLineTest(double[][] customX, double[][] customE,int K, String err,
boolean dp, boolean selCols, int proportionOfTuplesAddedInPercent,
int proportionOfTuplesRemovedInPercent, boolean onlyNullEAdded, boolean removeTuples,
boolean encodeLat, ExecMode mode)
{
private void runIncSliceLineTest(double[][] customX, double[][] customE,int K, String err, boolean dp, boolean selCols, int proportionOfTuplesAddedInPercent, int proportionOfTuplesRemovedInPercent, boolean onlyNullEAdded, boolean removeTuples, boolean encodeLat, ExecMode mode, boolean disableScore, boolean disableSize) {

ExecMode platformOld = setExecMode(mode);
loadTestConfiguration(getTestConfiguration(TEST_NAME2));
String HOME = SCRIPT_DIR + TEST_DIR;
Expand Down Expand Up @@ -1138,7 +1151,7 @@ private void runIncSliceLineTest(double[][] customX, double[][] customE,int K, S
fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
programArgs = new String[] { "-args", input("addedX"), input("oldX"), input("oldE"), input("addedE"), String.valueOf(K),
String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), String.valueOf(encodeLat).toUpperCase(), input("indicesRemoved"),
String.valueOf(VERBOSE).toUpperCase(), output("R1"), output("R2") };
String.valueOf(VERBOSE).toUpperCase(), output("R1"), output("R2"), String.valueOf(disableScore).toUpperCase(), String.valueOf(disableSize).toUpperCase() };

runTest(true, false, null, -1);

Expand Down Expand Up @@ -1272,6 +1285,9 @@ public void testIncSliceLineCustomInputsFull(double[][] addedX, double[][] oldX,

double[][] indicesRemoved = new double[1][1];
indicesRemoved[0][0] = 0;

boolean disableScore = false;
boolean disableSize = false;


writeInputMatrixWithMTD("addedX", addedX, false);
Expand All @@ -1283,7 +1299,7 @@ public void testIncSliceLineCustomInputsFull(double[][] addedX, double[][] oldX,
fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
programArgs = new String[] { "-args", input("addedX"), input("oldX"), input("oldE"), input("addedE"), String.valueOf(K),
String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), String.valueOf(encodeLat).toUpperCase(), input("indicesRemoved"),
String.valueOf(VERBOSE).toUpperCase(), output("R1"), output("R2") };
String.valueOf(VERBOSE).toUpperCase(), output("R1"), output("R2"), String.valueOf(disableScore).toUpperCase(), String.valueOf(disableSize).toUpperCase() };

runTest(true, false, null, -1);

Expand All @@ -1295,7 +1311,8 @@ public void testIncSliceLineCustomInputsFull(double[][] addedX, double[][] oldX,
TestUtils.compareMatrices(ret1, ret2, 1e-2);

Assert.assertFalse(heavyHittersContainsSubString("evalSlice"));
} finally {
}
finally {
rtplatform = platformOld;
}
}
Expand Down
15 changes: 14 additions & 1 deletion src/test/scripts/functions/builtin/incSliceLineFull.dml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ oldE = read($3);
addedE = read($4);
totalE = rbind(oldE, addedE);
indicesRemoved = read($9);
disableIncScorePruning = $13;
disableIncSizePruning = $14;

if(disableIncScorePruning & disableIncSizePruning){
pruningStrat = 3;
} else if (disableIncSizePruning){
pruningStrat = 2;
} else if (disableIncScorePruning){
pruningStrat = 1;
} else {
pruningStrat = 0;
}


if(nrow(indicesRemoved) > 0){
if(as.scalar(indicesRemoved[1]) == 0){
Expand All @@ -40,7 +53,7 @@ if(nrow(indicesRemoved) > 0){

# second increment
[TK1, TKC1, D1, L1, meta1, Stats1, Xout1, eOut1, foffb2, foffe2, params] = incSliceLine(addedX=addedX, oldX = oldX, oldE = oldE, addedE=addedE, prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC = TKC, k=$5,
alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb, prevFoffe = foffe);
alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb, prevFoffe = foffe, pruningStrat = pruningStrat);

# prepare totalX and totalE for running sliceline on total data
if(nrow(indicesRemoved) > 0){
Expand Down

0 comments on commit 5283544

Please sign in to comment.