Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML-98] [Release] Clean Service.java code #103

Merged
merged 1 commit into from
Jul 27, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
312 changes: 0 additions & 312 deletions mllib-dal/src/main/java/org/apache/spark/ml/util/Service.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,156 +33,6 @@
import java.util.ArrayList;

public class Service {
public static void readRow(String line, int offset, int nCols, double[] data) throws IOException {
if (line == null) {
throw new IOException("Unable to read input dataset");
}

String[] elements = line.split(",");
for (int j = 0; j < nCols; j++) {
data[offset + j] = Double.parseDouble(elements[j]);
}
}

public static void readRow(String line, int offset, int nCols, long[] data) throws IOException {
if (line == null) {
throw new IOException("Unable to read input dataset");
}

String[] elements = line.split(",");
for (int j = 0; j < nCols; j++) {
data[offset + j] = Long.parseLong(elements[j]);
}
}

public static void readRow(String line, int offset, int nCols, float[] data) throws IOException {
if (line == null) {
throw new IOException("Unable to read input dataset");
}

String[] elements = line.split(",");
for (int j = 0; j < nCols; j++) {
data[offset + j] = Float.parseFloat(elements[j]);
}
}

public static void readSparseData(String dataset, int nVectors, int nNonZeroValues,
long[] rowOffsets, long[] colIndices, double[] data) {
try {
BufferedReader bufferedReader = new BufferedReader(new FileReader(dataset));
readRow(bufferedReader.readLine(), 0, nVectors + 1, rowOffsets);
readRow(bufferedReader.readLine(), 0, nNonZeroValues, colIndices);
readRow(bufferedReader.readLine(), 0, nNonZeroValues, data);
bufferedReader.close();
} catch (IOException e) {
ErrorHandling.printThrowable(e);
} catch (NumberFormatException e) {
ErrorHandling.printThrowable(e);
}
}

private static int getRowLength(String line) {
String[] elements = line.split(",");
return elements.length;
}

public static CSRNumericTable createSparseTable(DaalContext context,
String dataset) throws IOException {
BufferedReader bufferedReader = new BufferedReader(new FileReader(dataset));

String rowIndexLine = bufferedReader.readLine();
int nVectors = getRowLength(rowIndexLine);
long[] rowOffsets = new long[nVectors];

readRow(rowIndexLine, 0, nVectors, rowOffsets);
nVectors = nVectors - 1;

String columnsLine = bufferedReader.readLine();
int nCols = getRowLength(columnsLine);

long[] colIndices = new long[nCols];
readRow(columnsLine, 0, nCols, colIndices);

String valuesLine = bufferedReader.readLine();
int nNonZeros = getRowLength(valuesLine);

float[] data = new float[nNonZeros];
readRow(valuesLine, 0, nNonZeros, data);

bufferedReader.close();

long maxCol = 0;
for (int i = 0; i < nCols; i++) {
if (colIndices[i] > maxCol) {
maxCol = colIndices[i];
}
}
int nFeatures = (int) maxCol;

if (nCols != nNonZeros || nNonZeros != (rowOffsets[nVectors] - 1)
|| nFeatures == 0 || nVectors == 0) {
throw new IOException("Unable to read input dataset");
}

return new CSRNumericTable(context, data, colIndices, rowOffsets, nFeatures, nVectors);
}

public static void printClassificationResult(float[] groundTruth, float[] classificationResults,
String classificatorName) {
System.out.println(classificatorName + " classification:");
System.out.println("Ground truth | Classification results");

for (int i = 0; i < Math.min(groundTruth.length, 20); i++) {
System.out.format("%+f\t\t%+f\n", groundTruth[i], classificationResults[i]);
}
}

public static void printClassificationResult(NumericTable groundTruth,
NumericTable classificationResults,
String header1, String header2,
String message, int nMaxRows) {
int nCols = (int) groundTruth.getNumberOfColumns();
int nRows = Math.min((int) groundTruth.getNumberOfRows(), nMaxRows);

FloatBuffer dataGroundTruth = FloatBuffer.allocate(nCols * nRows);
FloatBuffer dataClassificationResults = FloatBuffer.allocate(nCols * nRows);
try {
dataGroundTruth = groundTruth.getBlockOfRows(0, nRows, dataGroundTruth);
dataClassificationResults = classificationResults.getBlockOfRows(0, nRows,
dataClassificationResults);
} catch (IllegalAccessException e) {
ErrorHandling.printThrowable(e);
return;
}
System.out.println(message);
System.out.println(header1 + "\t" + header2);
for (int i = 0; i < nRows; i++) {
for (int j = 0; j < 1; j++) {
System.out.format("%+.0f\t\t%+.0f\n", dataGroundTruth.get(i * nCols + j),
dataClassificationResults.get(i * nCols + j));
}
}
}

public static void printClassificationResult(long[] groundTruth, long[] classificationResults,
String classificatorName) {
System.out.println(classificatorName + " classification:");
System.out.println("Ground truth | Classification results");

for (int i = 0; i < Math.min(groundTruth.length, 20); i++) {
System.out.format("%+d\t\t%+d\n", groundTruth[i], classificationResults[i]);
}
}

public static void printClassificationResult(long[] groundTruth, int[] classificationResults,
String classificatorName) {
System.out.println(classificatorName + " classification:");
System.out.println("Ground truth | Classification results");

for (int i = 0; i < Math.min(groundTruth.length, 20); i++) {
System.out.format("%+d\t\t%+d\n", groundTruth[i], classificationResults[i]);
}
}

public static void printMatrix(double[] matrix, int nCols, int nRows, String header) {
System.out.println(header);
Expand Down Expand Up @@ -397,168 +247,6 @@ public static void printNumericTables(NumericTable dataTable1, NumericTable data
System.out.println(builder.toString());
}

public static void printAprioriItemsets(HomogenNumericTable largeItemsetsTable,
HomogenNumericTable largeItemsetsSupportTable) {
/* Get sizes of tables to store large item sets */
int nItemsInLargeItemsets = (int) largeItemsetsTable.getNumberOfRows();
int largeItemsetCount = (int) largeItemsetsSupportTable.getNumberOfRows();
int nItemsetToPrint = 20;

/* Get item sets and their support values */
IntBuffer bufLargeItemsets = IntBuffer
.allocate(nItemsInLargeItemsets * (int) largeItemsetsTable.getNumberOfColumns());
try {
bufLargeItemsets = largeItemsetsTable.getBlockOfRows(0, nItemsInLargeItemsets,
bufLargeItemsets);
} catch (IllegalAccessException e) {
ErrorHandling.printThrowable(e);
return;
}
int[] largeItemsets = new int[bufLargeItemsets.capacity()];
bufLargeItemsets.get(largeItemsets);

IntBuffer bufLargeItemsetsSupportData = IntBuffer
.allocate(largeItemsetCount * (int) largeItemsetsSupportTable.getNumberOfColumns());
try {
bufLargeItemsetsSupportData = largeItemsetsSupportTable.getBlockOfRows(0, largeItemsetCount,
bufLargeItemsetsSupportData);
} catch (IllegalAccessException e) {
ErrorHandling.printThrowable(e);
return;
}
int[] largeItemsetsSupportData = new int[bufLargeItemsetsSupportData.capacity()];
bufLargeItemsetsSupportData.get(largeItemsetsSupportData);

ArrayList<ArrayList<Integer>> largeItemsetsVector
= new ArrayList<ArrayList<Integer>>(largeItemsetCount);

for (int i = 0; i < largeItemsetCount; i++) {
largeItemsetsVector.add(new ArrayList<Integer>());
}

for (int i = 0; i < nItemsInLargeItemsets; i++) {
largeItemsetsVector.get(largeItemsets[2 * i]).add(largeItemsets[2 * i + 1]);
}

ArrayList<Integer> supportVector = new ArrayList<Integer>(largeItemsetCount);
for (int i = 0; i < largeItemsetCount; i++) {
supportVector.add(0);
}

for (int i = 0; i < largeItemsetCount; i++) {
int index = largeItemsetsSupportData[2 * i];
supportVector.set(index, largeItemsetsSupportData[2 * i + 1]);
}

System.out.println("\nApriori example program results");
System.out.println("\nLast " + nItemsetToPrint + " large itemsets: ");
System.out.println("\nItemset\t\t\tSupport");

int iMin = ((largeItemsetCount > nItemsetToPrint) ? largeItemsetCount - nItemsetToPrint : 0);
for (int i = iMin; i < largeItemsetCount; i++) {
System.out.print("{");
for (int l = 0; l < largeItemsetsVector.get(i).size() - 1; l++) {
System.out.print(largeItemsetsVector.get(i).get(l) + ", ");
}
System.out.print(largeItemsetsVector.get(i).get(
largeItemsetsVector.get(i).size() - 1) + "}\t\t");

System.out.println(supportVector.get(i));
}
}

public static void printAprioriRules(HomogenNumericTable leftItemsTable,
HomogenNumericTable rightItemsTable,
HomogenNumericTable confidenceTable) {
int nRulesToPrint = 20;
/* Get sizes of tables to store association rules */
int nLeftItems = (int) leftItemsTable.getNumberOfRows();
int nRightItems = (int) rightItemsTable.getNumberOfRows();
int nRules = (int) confidenceTable.getNumberOfRows();

/* Get association rules data */

IntBuffer bufLeftItems = IntBuffer.allocate(
nLeftItems * (int) leftItemsTable.getNumberOfColumns());
try {
bufLeftItems = leftItemsTable.getBlockOfRows(0, nLeftItems, bufLeftItems);
} catch (IllegalAccessException e) {
ErrorHandling.printThrowable(e);
return;
}
int[] leftItems = new int[bufLeftItems.capacity()];
bufLeftItems.get(leftItems);

IntBuffer bufRightItems = IntBuffer.allocate(
nRightItems * (int) rightItemsTable.getNumberOfColumns());
try {
bufRightItems = rightItemsTable.getBlockOfRows(0, nRightItems, bufRightItems);
} catch (IllegalAccessException e) {
ErrorHandling.printThrowable(e);
return;
}
int[] rightItems = new int[bufRightItems.capacity()];
bufRightItems.get(rightItems);

FloatBuffer bufConfidence = FloatBuffer.allocate(
nRules * (int) confidenceTable.getNumberOfColumns());
try {
bufConfidence = confidenceTable.getBlockOfRows(0, nRules, bufConfidence);
} catch (IllegalAccessException e) {
ErrorHandling.printThrowable(e);
return;
}
float[] confidence = new float[bufConfidence.capacity()];
bufConfidence.get(confidence);

ArrayList<ArrayList<Integer>> leftItemsVector = new ArrayList<ArrayList<Integer>>(nRules);
for (int i = 0; i < nRules; i++) {
leftItemsVector.add(new ArrayList<Integer>());
}

if (nRules == 0) {
System.out.println("No association rules were found ");
return;
}

for (int i = 0; i < nLeftItems; i++) {
leftItemsVector.get((leftItems[2 * i])).add(leftItems[2 * i + 1]);
}

ArrayList<ArrayList<Integer>> rightItemsVector = new ArrayList<ArrayList<Integer>>(nRules);
for (int i = 0; i < nRules; i++) {
rightItemsVector.add(new ArrayList<Integer>());
}

for (int i = 0; i < nRightItems; i++) {
rightItemsVector.get((rightItems[2 * i])).add(rightItems[2 * i + 1]);
}

ArrayList<Float> confidenceVector = new ArrayList<Float>(nRules);
for (int i = 0; i < nRules; i++) {
confidenceVector.add(confidence[i]);
}

System.out.println("\nLast " + nRulesToPrint + " association rules: ");
System.out.println("\nRule" + "\t\t\t\tConfidence");

int iMin = ((nRules > nRulesToPrint) ? (nRules - nRulesToPrint) : 0);
for (int i = iMin; i < nRules; i++) {
System.out.print("{");
for (int l = 0; l < leftItemsVector.get(i).size() - 1; l++) {
System.out.print(leftItemsVector.get(i).get(l) + ", ");
}
System.out.print(leftItemsVector.get(i).get(leftItemsVector.get(i).size() - 1) + "} => {");

for (int l = 0; l < rightItemsVector.get(i).size() - 1; l++) {
System.out.print(rightItemsVector.get(i).get(l) + ", ");
}
System.out.print(rightItemsVector.get(i).get(rightItemsVector.get(i).size() - 1) + "}\t\t");

System.out.println(confidenceVector.get(i));
}
}

public static void printALSRatings(NumericTable usersOffsetTable, NumericTable itemsOffsetTable,
NumericTable ratings) {
long nUsers = ratings.getNumberOfRows();
Expand Down