Skip to content

Commit

Permalink
code clean up and improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
pyspark-in-action committed May 25, 2015
1 parent 96e545b commit 7e0d2ae
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 59 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.dataalgorithms.chap01.mapreduce;

import org.apache.log4j.Logger;
import java.io.IOException;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
// STEP-0: import required Java/Spark classes.
import org.dataalgorithms.util.SparkUtil;
import scala.Tuple2;
import scala.Tuple3;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaPairRDD;
Expand All @@ -14,9 +13,7 @@

import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.Collections;
import java.util.Comparator;

/**
* SparkSecondarySort class implemets the secondary sort design pattern
Expand Down
106 changes: 53 additions & 53 deletions src/test/java/org/dataalgorithms/chap05/mapreduce/PairOfWordsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,79 +5,79 @@
import org.junit.Test;

/**
* This is a test class.
* This class tests the map() for Relative Frequency of words.
* This is a test class. This class tests the map() for Relative Frequency of
* words.
*
* @author Mahmoud Parsian
*
*/
public class PairOfWordsTest {

private static final int neighborWindow = 2;
private static PairOfWords pair = new PairOfWords();
private static final int neighborWindow = 2;
private static final PairOfWords pair = new PairOfWords();

@Test
public void canGetRelativeFrequency() {
int totalCount;
String value = "w1 w2 w3 w4 w5 w6";
String value = "w1 w2 w3 w4 w5 w6";
String[] tokens = value.split(" ");
if ( (tokens == null) || (tokens.length < 2) ) {
return;
if ((tokens == null) || (tokens.length < 2)) {
return;
}

for (int i = 0; i < tokens.length; i++) {
tokens[i] = tokens[i].replaceAll("\\W+","");

if(tokens[i].equals("")){
continue;
}
for (int i = 0; i < tokens.length; i++) {
tokens[i] = tokens[i].replaceAll("\\W+", "");

pair.setWord(tokens[i]);
if (tokens[i].equals("")) {
continue;
}

int start = (i - neighborWindow < 0) ? 0 : i - neighborWindow;
int end = (i + neighborWindow >= tokens.length) ? tokens.length - 1 : i + neighborWindow;
for (int j = start; j <= end; j++) {
if (j == i) continue;
pair.setNeighbor(tokens[j].replaceAll("\\W",""));
pair.setWord(tokens[i]);

System.out.println("pair="+pair+" 1");
}
pair.setNeighbor("*");
totalCount = end - start;

System.out.println("pair="+pair+ " "+ totalCount);
int start = (i - neighborWindow < 0) ? 0 : i - neighborWindow;
int end = (i + neighborWindow >= tokens.length) ? tokens.length - 1 : i + neighborWindow;
for (int j = start; j <= end; j++) {
if (j == i) {
continue;
}
pair.setNeighbor(tokens[j].replaceAll("\\W", ""));

System.out.println("pair=" + pair + " 1");
}
pair.setNeighbor("*");
totalCount = end - start;
System.out.println("pair=" + pair + " " + totalCount);
assertThat(totalCount, greaterThan(1));
}
}
}
}

/*
Sample run:
Sample run:
# java org.dataalgorithms.chap05.TestMapper
pair=(w1, w2) 1
pair=(w1, w3) 1
pair=(w1, *) 2
pair=(w2, w1) 1
pair=(w2, w3) 1
pair=(w2, w4) 1
pair=(w2, *) 3
pair=(w3, w1) 1
pair=(w3, w2) 1
pair=(w3, w4) 1
pair=(w3, w5) 1
pair=(w3, *) 4
pair=(w4, w2) 1
pair=(w4, w3) 1
pair=(w4, w5) 1
pair=(w4, w6) 1
pair=(w4, *) 4
pair=(w5, w3) 1
pair=(w5, w4) 1
pair=(w5, w6) 1
pair=(w5, *) 3
pair=(w6, w4) 1
pair=(w6, w5) 1
pair=(w6, *) 2
*/
# java org.dataalgorithms.chap05.TestMapper
pair=(w1, w2) 1
pair=(w1, w3) 1
pair=(w1, *) 2
pair=(w2, w1) 1
pair=(w2, w3) 1
pair=(w2, w4) 1
pair=(w2, *) 3
pair=(w3, w1) 1
pair=(w3, w2) 1
pair=(w3, w4) 1
pair=(w3, w5) 1
pair=(w3, *) 4
pair=(w4, w2) 1
pair=(w4, w3) 1
pair=(w4, w5) 1
pair=(w4, w6) 1
pair=(w4, *) 4
pair=(w5, w3) 1
pair=(w5, w4) 1
pair=(w5, w6) 1
pair=(w5, *) 3
pair=(w6, w4) 1
pair=(w6, w5) 1
pair=(w6, *) 2
*/
1 change: 0 additions & 1 deletion src/test/java/org/dataalgorithms/util/TestCombination.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,3 @@ public static void test1() throws Exception {
System.out.println(comb);
}
}

0 comments on commit 7e0d2ae

Please sign in to comment.