Skip to content

Commit

Permalink
[SYSTEMDS-43] New scale builtin function
Browse files Browse the repository at this point in the history
Added code to install outliers R package.

Scale builtin function, which scales (calculates z-score) and centers an input matrix. The corresponding test routines.

Closes #4, closes #5.
  • Loading branch information
phaniarnab authored and mboehm7 committed Apr 25, 2019
1 parent 935872f commit 7506ed8
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 1 deletion.
41 changes: 41 additions & 0 deletions scripts/builtin/scale.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#-------------------------------------------------------------
#
# Copyright 2019 Graz University of Technology
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#-------------------------------------------------------------

m_scale = function(Matrix[Double] X, Boolean center, Boolean scale) return (Matrix[Double] Y) {
# This function centers scales and performs z-score on the input matrix X

if (center) {
cm = colMeans(X);
X = X - cm;
}

if (scale) {
N = nrow(X);
cvars = (colSums(X^2));
if (center == TRUE) {
cm = colMeans(X);
cvars = (cvars - N*(cm^2))/(N-1);
}
else
cvars = cvars/(N-1);

X = X/sqrt(cvars);
X = replace(target=X, pattern=NaN, replacement=0); #replace NaNs with 0's
}
Y = X;
}
1 change: 1 addition & 0 deletions src/main/java/org/tugraz/sysds/common/Builtins.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ public enum Builtins {
QEXP("qexp", false, true),
REPLACE("replace", false, true),
RMEMPTY("removeEmpty", false, true),
SCALE("scale", true, false), //TODO parameterize center & scale
TOSTRING("toString", false, true),
TRANSFORMAPPLY("transformapply", false, true),
TRANSFORMCOLMAP("transformcolmap", false, true),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/*
* Copyright 2019 Graz University of Technology
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.tugraz.sysds.test.functions.builtin;

import java.util.HashMap;

import org.junit.Test;
import org.tugraz.sysds.common.Types.ExecMode;
import org.tugraz.sysds.lops.LopProperties.ExecType;
import org.tugraz.sysds.runtime.matrix.data.MatrixValue.CellIndex;
import org.tugraz.sysds.test.AutomatedTestBase;
import org.tugraz.sysds.test.TestConfiguration;
import org.tugraz.sysds.test.TestUtils;

public class BuiltinScaleTest extends AutomatedTestBase
{
private final static String TEST_NAME = "Scale";
private final static String TEST_DIR = "functions/builtin/";
private final static String TEST_CLASS_DIR = TEST_DIR + BuiltinScaleTest.class.getSimpleName() + "/";

private final static double eps = 1e-8;
private final static int rows = 1765;
private final static int cols = 392;
private final static double spSparse = 0.7;
private final static double spDense = 0.1;

@Override
public void setUp() {
addTestConfiguration(TEST_NAME,new TestConfiguration(TEST_CLASS_DIR, TEST_NAME,new String[]{"B"}));
}

@Test
public void testScaleDenseNegNegCP() {
runScaleTest(false, false, false, ExecType.CP);
}

@Test
public void testScaleDenseNegPosCP() {
runScaleTest(false, false, true, ExecType.CP);
}

@Test
public void testScaleDensePosNegCP() {
runScaleTest(false, true, false, ExecType.CP);
}

@Test
public void testScaleDensePosPosCP() {
runScaleTest(false, true, true, ExecType.CP);
}

@Test
public void testScaleDenseNegNegSP() {
runScaleTest(false, false, false, ExecType.SPARK);
}

@Test
public void testScaleDenseNegPosSP() {
runScaleTest(false, false, true, ExecType.SPARK);
}

@Test
public void testScaleDensePosNegSP() {
runScaleTest(false, true, false, ExecType.SPARK);
}

@Test
public void testScaleDensePosPosSP() {
runScaleTest(false, true, true, ExecType.SPARK);
}

@Test
public void testScaleSparseNegNegCP() {
runScaleTest(true, false, false, ExecType.CP);
}

@Test
public void testScaleSparseNegPosCP() {
runScaleTest(true, false, true, ExecType.CP);
}

@Test
public void testScaleSparsePosNegCP() {
runScaleTest(true, true, false, ExecType.CP);
}

@Test
public void testScaleSparsePosPosCP() {
runScaleTest(true, true, true, ExecType.CP);
}

@Test
public void testScaleSparseNegNegSP() {
runScaleTest(true, false, false, ExecType.SPARK);
}

@Test
public void testScaleSparseNegPosSP() {
runScaleTest(true, false, true, ExecType.SPARK);
}

@Test
public void testScaleSparsePosNegSP() {
runScaleTest(true, true, false, ExecType.SPARK);
}

@Test
public void testScaleSparsePosPosSP() {
runScaleTest(true, true, true, ExecType.SPARK);
}

private void runScaleTest(boolean sparse, boolean center, boolean scale, ExecType instType)
{
ExecMode platformOld = rtplatform;
switch( instType ) {
case SPARK: rtplatform = ExecMode.SPARK; break;
default: rtplatform = ExecMode.HYBRID; break;
}

try
{
loadTestConfiguration(getTestConfiguration(TEST_NAME));

String HOME = SCRIPT_DIR + TEST_DIR;
fullDMLScriptName = HOME + TEST_NAME + ".dml";
programArgs = new String[]{"-explain", "-args", input("A"),
String.valueOf(center).toUpperCase(), String.valueOf(scale).toUpperCase(),
output("B") };
fullRScriptName = HOME + TEST_NAME + ".R";
rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " "
+ String.valueOf(center).toUpperCase() + " " + String.valueOf(scale).toUpperCase() +
" " + expectedDir();

//generate actual dataset
double[][] A = getRandomMatrix(rows, cols, -1, 1, sparse?spSparse:spDense, 7);
writeInputMatrixWithMTD("A", A, true);

runTest(true, false, null, -1);
runRScript(true);

//compare matrices
HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B");
HashMap<CellIndex, Double> rfile = readRMatrixFromFS("B");
TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
}
finally {
rtplatform = platformOld;
}
}

}
27 changes: 27 additions & 0 deletions src/test/scripts/functions/builtin/scale.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#-------------------------------------------------------------
#
# Copyright 2019 Graz University of Technology
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#-------------------------------------------------------------

args<-commandArgs(TRUE)
options(digits=22)
library("Matrix")
#library("scale")

X = as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
Y = as.matrix(scale(X, center=as.logical(args[2]), scale=as.logical(args[3])));
Y[is.nan(Y)] = 0
writeMM(as(Y, "CsparseMatrix"), paste(args[4], "B", sep=""));
21 changes: 21 additions & 0 deletions src/test/scripts/functions/builtin/scale.dml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#-------------------------------------------------------------
#
# Copyright 2019 Graz University of Technology
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#-------------------------------------------------------------

X = read($1);
Y = scale(X, $2, $3);
write(Y, $4)
3 changes: 2 additions & 1 deletion src/test/scripts/installDependencies.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,5 @@ custom_install("plotrix");
custom_install("psych");
custom_install("moments");
custom_install("batch");
custom_install("matrixStats");
custom_install("matrixStats");
custom_install("outliers");

0 comments on commit 7506ed8

Please sign in to comment.