From f22fa5a988b86b3ba7373187e5c2c7314ba86212 Mon Sep 17 00:00:00 2001 From: Lefteris Date: Sat, 30 Apr 2016 18:34:14 +0300 Subject: [PATCH] 2016.04.30_stellar Lefteris Paraskevas [ADD]: Generated JavaDoc. [REF]: Various name changes. [REA]: Various JavaDoc fixes. -------------------------------------------------- #All comments must be removed prior to commit except from the 'Committing two milestones' info --- nbproject/project.properties | 68 ++------- src/edmodule/AbstractEDMethod.java | 5 +- src/edmodule/EDMethodPicker.java | 2 +- src/edmodule/data/EDCoWCorpus.java | 5 +- src/edmodule/data/PeakFindingCorpus.java | 14 +- src/edmodule/edcow/EDCoW.java | 19 +-- src/edmodule/peakfinding/BinsCreator.java | 11 +- .../peakfinding/OfflinePeakFinding.java | 3 +- .../peakfinding/event/PeakFindingEvent.java | 8 +- .../peakfinding/event/PeakFindingEvents.java | 5 +- src/edmodule/utils/StringDateUtils.java | 12 +- src/evaluator/EDCoWEvaluator.java | 5 +- src/evaluator/PeakFindingEvaluator.java | 10 +- src/evs/EvS.java | 4 +- src/evs/data/SentimentWindowEntity.java | 10 +- src/evs/edcow/SentimentEDCoW.java | 130 +++++++++--------- .../evaluator/SentimentEDCoWEvaluator.java | 5 +- .../SentimentPeakFindingEvaluator.java | 7 +- src/evs/peakfinding/SentimentPeakFinding.java | 16 ++- .../event/SentimentPeakFindingEvent.java | 19 +-- .../event/SentimentPeakFindingEvents.java | 7 +- src/experimenter/PeakFindingExperimenter.java | 16 +-- .../nlp/stopwords/StopWords.java | 4 +- src/utilities/Utilities.java | 9 +- src/utilities/dsretriever/MongoHandler.java | 39 +++--- .../dsretriever/TweetsRetriever.java | 4 +- 26 files changed, 212 insertions(+), 225 deletions(-) diff --git a/nbproject/project.properties b/nbproject/project.properties index 1fb2942..a53ebda 100644 --- a/nbproject/project.properties +++ b/nbproject/project.properties @@ -1,8 +1,7 @@ -file.reference.commons-cli-1.3.1-javadoc.jar=lib/commons-cli-1.3.1-javadoc.jar -file.reference.commons-cli-1.3.1.jar=lib/commons-cli-1.3.1.jar -#Fri Apr 29 13:47:44 EEST 2016 +#Sat Apr 30 17:10:14 EEST 2016 jnlp.offline-allowed=false javadoc.splitindex=true +file.reference.commons-cli-1.3.1-javadoc.jar=lib/commons-cli-1.3.1-javadoc.jar file.reference.twitter4j-examples-4.0.4.jar=lib/twitter4j-examples-4.0.4.jar file.reference.bson-3.0.4.jar=lib/bson-3.0.4.jar build.classes.excludes=**/*.java,**/*.form @@ -11,9 +10,7 @@ build.sysclasspath=ignore file.reference.twitter4j-media-support-4.0.4.jar=lib/twitter4j-media-support-4.0.4.jar javac.target=1.8 build.generated.dir=${build.dir}/generated -run.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir} +run.classpath=${javac.classpath}\:${build.classes.dir} file.reference.mongodb-driver-3.0.4.jar=lib/mongodb-driver-3.0.4.jar file.reference.reflections-0.9.9-RC1.jar=lib/reflections-0.9.9-RC1.jar debug.test.classpath=${run.test.classpath} @@ -48,9 +45,7 @@ run.jvmargs= manifest.custom.permissions= javac.test.processorpath=${javac.test.classpath} dist.archive.excludes= -run.test.classpath=\ - ${javac.test.classpath}:\ - ${build.test.classes.dir} +run.test.classpath=${javac.test.classpath}\:${build.test.classes.dir} build.generated.sources.dir=${build.dir}/generated-sources jnlp.signing= javadoc.notree=false @@ -67,77 +62,42 @@ dist.javadoc.dir=${dist.dir}/javadoc file.reference.xom.jar=lib/xom.jar file.reference.commons-math3-3.4.jar=lib/commons-math3-3.4.jar javadoc.additionalparam= -javac.classpath=\ - ${file.reference.HAC.jar}:\ - ${file.reference.twitter4j-async-4.0.4.jar}:\ - ${file.reference.twitter4j-core-4.0.4.jar}:\ - ${file.reference.twitter4j-examples-4.0.4.jar}:\ - ${file.reference.twitter4j-media-support-4.0.4.jar}:\ - ${file.reference.twitter4j-stream-4.0.4.jar}:\ - ${file.reference.bson-3.0.4.jar}:\ - ${file.reference.commons-io-2.4.jar}:\ - ${file.reference.commons-lang3-3.1.jar}:\ - ${file.reference.commons-math3-3.4.jar}:\ - ${file.reference.controlsfx-8.40.10-20150826.135843-344.jar}:\ - ${file.reference.gs-algo-1.2.jar}:\ - ${file.reference.gs-core-1.2.jar}:\ - ${file.reference.gs-ui-1.2.jar}:\ - ${file.reference.guava-18.0.jar}:\ - ${file.reference.javassist.jar}:\ - ${file.reference.jmod-1.2b.jar}:\ - ${file.reference.jtransforms-2.4.jar}:\ - ${file.reference.lucene-analyzers-common-4.10.2.jar}:\ - ${file.reference.lucene-core-4.10.2.jar}:\ - ${file.reference.mongodb-driver-3.0.4.jar}:\ - ${file.reference.mongodb-driver-async-3.0.4.jar}:\ - ${file.reference.mongodb-driver-core-3.0.4.jar}:\ - ${file.reference.reflections-0.9.9-RC1.jar}:\ - ${file.reference.xom.jar}:\ - ${file.reference.ejml-0.23.jar}:\ - ${file.reference.stanford-corenlp-3.5.2.jar}:\ - ${file.reference.stanford-corenlp-3.5.2-models.jar}:\ - ${file.reference.jWave_java_groovy.jar}:\ - ${file.reference.commons-cli-1.3.1-javadoc.jar}:\ - ${file.reference.commons-cli-1.3.1.jar} +javac.classpath=${file.reference.HAC.jar}\:${file.reference.twitter4j-async-4.0.4.jar}\:${file.reference.twitter4j-core-4.0.4.jar}\:${file.reference.twitter4j-examples-4.0.4.jar}\:${file.reference.twitter4j-media-support-4.0.4.jar}\:${file.reference.twitter4j-stream-4.0.4.jar}\:${file.reference.bson-3.0.4.jar}\:${file.reference.commons-io-2.4.jar}\:${file.reference.commons-lang3-3.1.jar}\:${file.reference.commons-math3-3.4.jar}\:${file.reference.controlsfx-8.40.10-20150826.135843-344.jar}\:${file.reference.gs-algo-1.2.jar}\:${file.reference.gs-core-1.2.jar}\:${file.reference.gs-ui-1.2.jar}\:${file.reference.guava-18.0.jar}\:${file.reference.javassist.jar}\:${file.reference.jmod-1.2b.jar}\:${file.reference.jtransforms-2.4.jar}\:${file.reference.lucene-analyzers-common-4.10.2.jar}\:${file.reference.lucene-core-4.10.2.jar}\:${file.reference.mongodb-driver-3.0.4.jar}\:${file.reference.mongodb-driver-async-3.0.4.jar}\:${file.reference.mongodb-driver-core-3.0.4.jar}\:${file.reference.reflections-0.9.9-RC1.jar}\:${file.reference.xom.jar}\:${file.reference.ejml-0.23.jar}\:${file.reference.stanford-corenlp-3.5.2.jar}\:${file.reference.stanford-corenlp-3.5.2-models.jar}\:${file.reference.jWave_java_groovy.jar}\:${file.reference.commons-cli-1.3.1-javadoc.jar}\:${file.reference.commons-cli-1.3.1.jar} javadoc.noindex=false manifest.custom.codebase= file.reference.jtransforms-2.4.jar=lib/jtransforms-2.4.jar +file.reference.commons-cli-1.3.1.jar=lib/commons-cli-1.3.1.jar annotation.processing.enabled.in.editor=false file.reference.controlsfx-8.40.10-20150826.135843-344.jar=lib/controlsfx-8.40.10-20150826.135843-344.jar -javadoc.private=false file.reference.commons-io-2.4.jar=lib/commons-io-2.4.jar -build.test.classes.dir=${build.dir}/test/classes +javadoc.private=false javadoc.encoding=${source.encoding} +build.test.classes.dir=${build.dir}/test/classes jar.index=${jnlp.enabled} javac.compilerargs= file.reference.lucene-analyzers-common-4.10.2.jar=lib/lucene-analyzers-common-4.10.2.jar -project.license=gpl30 source.encoding=UTF-8 +project.license=gpl30 file.reference.gs-algo-1.2.jar=lib/gs-algo-1.2.jar jnlp.signing.keystore= meta.inf.dir=${src.dir}/META-INF -excludes= annotation.processing.run.all.processors=true +excludes= application.title=MSc jnlp.codebase.type=no.codebase file.reference.stanford-corenlp-3.5.2-models.jar=lib/stanford-corenlp-3.5.2-models.jar -javac.processorpath=\ - ${javac.classpath} +javac.processorpath=${javac.classpath} build.test.results.dir=${build.dir}/test/results file.reference.commons-lang3-3.1.jar=lib/commons-lang3-3.1.jar -endorsed.classpath= javadoc.use=true +endorsed.classpath= javadoc.nonavbar=false jnlp.signing.alias= javadoc.windowtitle= -javac.test.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir}:\ - ${libs.junit_4.classpath}:\ - ${libs.hamcrest.classpath} +javac.test.classpath=${javac.classpath}\:${build.classes.dir}\:${libs.junit_4.classpath}\:${libs.hamcrest.classpath} file.reference.mongodb-driver-core-3.0.4.jar=lib/mongodb-driver-core-3.0.4.jar src.dir=src -annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output file.reference.HAC.jar=lib/HAC.jar +annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output debug.classpath=${run.classpath} jnlp.enabled=false diff --git a/src/edmodule/AbstractEDMethod.java b/src/edmodule/AbstractEDMethod.java index 29aa2b5..ecf25bf 100644 --- a/src/edmodule/AbstractEDMethod.java +++ b/src/edmodule/AbstractEDMethod.java @@ -22,7 +22,7 @@ * but every separate ED technique has to implement these 6 methods. * * @author Lefteris Paraskevas - * @version 2016.04.09_1945 + * @version 2016.04.30_1825 */ public interface AbstractEDMethod { @@ -35,7 +35,7 @@ public interface AbstractEDMethod { /** * Get the citation of the paper that introduced the used Event Detection algorithm. * @return A String containing the citation. The String must start and end - * with '
  • ' tags. + * with '<li>' tags. */ public abstract String getCitation(); @@ -55,6 +55,7 @@ public interface AbstractEDMethod { /** * Main method that starts the execution of the Event Detection algorithm. + * @throws java.lang.Exception General Exception */ public abstract void apply() throws Exception; diff --git a/src/edmodule/EDMethodPicker.java b/src/edmodule/EDMethodPicker.java index 5d11517..84015a9 100644 --- a/src/edmodule/EDMethodPicker.java +++ b/src/edmodule/EDMethodPicker.java @@ -92,7 +92,7 @@ public static void selectEDMethod(Config config, boolean showInlineInfo, List> bins = BinsCreator.createBins(corpus, config, window); PeakFindingExperimenter exper = new PeakFindingExperimenter(corpus, - bins, alpha, taph, pi, window, config); + bins, alpha, taph, pi, config); //Experiment with Taph List lines = exper.experimentUsingTaph(1, 10, 1, showInlineInfo); diff --git a/src/edmodule/data/EDCoWCorpus.java b/src/edmodule/data/EDCoWCorpus.java index 204a540..2f4dc19 100644 --- a/src/edmodule/data/EDCoWCorpus.java +++ b/src/edmodule/data/EDCoWCorpus.java @@ -40,7 +40,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.09_1959 + * @version 2016.04.30_1826 */ public class EDCoWCorpus { @@ -175,6 +175,7 @@ public final void createCorpus() { * into that time period. * @param cal A Calendar instance, already set. * @param date The date to be checked. + * @return A String key with date information. */ public final String updateMessageDistribution(Calendar cal, Date date) { String key = StringDateUtils.getDateKey(cal, date, refreshWindow); @@ -202,7 +203,7 @@ public List getTerms() { /** * Initializes and stores a list containing objects of DocumentTermFrequencyItem - * class.
    + * class.
    * More formally, each listing in this list contains a triplet with the ID of * a document, the ID of a term that the document contains and the term's * frequency. diff --git a/src/edmodule/data/PeakFindingCorpus.java b/src/edmodule/data/PeakFindingCorpus.java index a153332..b92d3b5 100644 --- a/src/edmodule/data/PeakFindingCorpus.java +++ b/src/edmodule/data/PeakFindingCorpus.java @@ -29,7 +29,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.09_2000 + * @version 2016.04.30_1826 */ public class PeakFindingCorpus { @@ -64,13 +64,13 @@ public PeakFindingCorpus(Config config, List tweets, StopWordsHandlers sw * earliest and the latest date of corpus if needed (assuming that the corpus * has some extend of sparseness). * @param window An integer indicating the time interval in which the tweets - * should be counted. All values in minutes.
    - * E.g. For 1 minute interval --> 1.
    - * For half an hour interval --> 30.
    - * For 5 hours interval --> 300. + * should be counted. All values in minutes.
    + * E.g. For 1 minute interval, 1.
    + * For half an hour interval, 30.
    + * For 5 hours interval, 300. * @return A HashMap containing the bins. - * @see BinsCreator BinsCreator class. - * @see OfflinePeakFinding OfflinePeakFinding class. + * @see edmodule.peakfinding.BinsCreator BinsCreator class. + * @see edmodule.peakfinding.OfflinePeakFinding OfflinePeakFinding class. */ public final HashMap createCorpus(int window) { //Initialize variables diff --git a/src/edmodule/edcow/EDCoW.java b/src/edmodule/edcow/EDCoW.java index 41ccc35..51ec663 100644 --- a/src/edmodule/edcow/EDCoW.java +++ b/src/edmodule/edcow/EDCoW.java @@ -30,10 +30,10 @@ /** * * @author Adrien GUILLE, ERIC Lab, University of Lyon 2 - * @email adrien.guille@univ-lyon2.fr + * email adrien.guille@univ-lyon2.fr * * @author Lefteris Paraskevas (configurations in EDCoW to omit missing components) - * @version 2016.04.09_2001 (For EvS project version alignment) + * @version 2016.04.30_1826 (For EvS project version alignment) */ public class EDCoW implements AbstractEDMethod { private final int delta; //6 @@ -51,12 +51,12 @@ public class EDCoW implements AbstractEDMethod { private long executionTime; /** - * Default constructor with minimum parameters.
    + * Default constructor with minimum parameters.
    * Delta is set to 6, gamma is set to 5, minimum term support is set to * 0.0001 and maximum term support is set to 0.01. If you wish to change the * aforementioned values use the {@link #EDCoW(int, int, int, double, double, * int, int, EDCoWCorpus) second constructor}. - * @param delta2 Delta2 value.
    + * @param delta2 Delta2 value.
    * Prime divisors of the number of documents are required as values. It must * be cross-referenced with the number of documents. More specifically, the * outcome of the division between the number of documents and this metric @@ -78,21 +78,21 @@ public EDCoW(int delta2, int timeSliceA, int timeSliceB, EDCoWCorpus corpus) { /** * Default constructor with the full set of parameters. - * @param delta1 Delta value (suggested 6).
    + * @param delta1 Delta value (suggested 6).
    * It directly affects the number of events. Increasing this value, reduces * the number of them and vice versa. - * @param delta2 Delta2 value.
    + * @param delta2 Delta2 value.
    * Prime divisors of the number of documents are required as values. It must * be cross-referenced with the number of documents. More specifically, the * outcome of the division between the number of documents and this metric * should result the number of total windows. - * @param gamma Gamma value (suggested 5).
    + * @param gamma Gamma value (suggested 5).
    * It affects the quality of the uncovered events. Values greater than 15, * seem to increase the number of the uncovered events. - * @param minTermSupport Minimum term support value (suggested 0.0001).
    + * @param minTermSupport Minimum term support value (suggested 0.0001).
    * Changing this value would result in altering the lower bound below which * a term should not be included in the keywords list of an event. - * @param maxTermSupport Maximum term support value (suggested 0.01).
    + * @param maxTermSupport Maximum term support value (suggested 0.01).
    * Changing this value would result in altering the upper bound above which * a term should not be included in the keywords list of an event. * @param timeSliceA Starting timeslice. @@ -199,6 +199,7 @@ public void apply() { * Method to run the algorithm and analyze terms and frequencies in a * specific window. * @param window The window index (0, 1, 2 etc). + * @throws java.lang.Exception General Exception. */ public void processWindow(int window) throws Exception { LinkedList keyWords = new LinkedList<>(); diff --git a/src/edmodule/peakfinding/BinsCreator.java b/src/edmodule/peakfinding/BinsCreator.java index 7c10cbd..87b8fa8 100644 --- a/src/edmodule/peakfinding/BinsCreator.java +++ b/src/edmodule/peakfinding/BinsCreator.java @@ -29,7 +29,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.02.19_1711 + * @version 2016.04.30_1826 */ public class BinsCreator { @@ -37,12 +37,13 @@ public class BinsCreator { * Method to create and return the bins needed for OfflinePeakFinding algorithm to operate. * More formally, it creates an List of BinPair objects, containing the count * of tweets in pre-specified time intervals (windows). + * @param corpus A PeakFindingCorpus object. * @param config A Configuration object. * @param refreshWindow An integer indicating the time interval in which the tweets - * should be counted.All values in minutes.
    - * E.g. For 1 minute interval --> 1.
    - * For half an hour interval --> 30.
    - * For 5 hours interval --> 300. + * should be counted.All values in minutes.
    + * E.g. For 1 minute interval --> 1.
    + * For half an hour interval --> 30.
    + * For 5 hours interval --> 300. * @return An List of BinPair objects containing the bins. * @see StringDateUtils StringDateUtils class. * @see BinPair BinPair class. diff --git a/src/edmodule/peakfinding/OfflinePeakFinding.java b/src/edmodule/peakfinding/OfflinePeakFinding.java index e8ef3eb..31540e9 100644 --- a/src/edmodule/peakfinding/OfflinePeakFinding.java +++ b/src/edmodule/peakfinding/OfflinePeakFinding.java @@ -29,7 +29,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.09_2003 + * @version 2016.04.30_1826 * * Based on [1] Marcus A. et al., "TwitInfo: Aggregating and Visualizing Microblogs * for PeakFindingEvent Exploration", CHI 2011. @@ -55,7 +55,6 @@ public class OfflinePeakFinding implements AbstractEDMethod { * @param a Alpha parameter to capture historical information. Values lower than 1 are recommended. * @param t Threshold parameter. * @param p Primary parameter indicates the first bins to be considered in calculating initial mean deviance. - * @param refreshWindow An integer representing the refresh window of every bin. * @param corpus A PeakFindingCorpus object. */ public OfflinePeakFinding(List> bins, double a, int t, int p, PeakFindingCorpus corpus) { diff --git a/src/edmodule/peakfinding/event/PeakFindingEvent.java b/src/edmodule/peakfinding/event/PeakFindingEvent.java index 382065c..9d07bd9 100644 --- a/src/edmodule/peakfinding/event/PeakFindingEvent.java +++ b/src/edmodule/peakfinding/event/PeakFindingEvent.java @@ -33,7 +33,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.09_2004 + * @version 2016.04.30_1827 */ public class PeakFindingEvent { @@ -83,7 +83,7 @@ public PeakFindingEvent(int id, Window window, List twe /** * Generates a List with the most common terms of the tweetsOfEvent that belong - * to the specific event.
    + * to the specific event.
    * More formally, it parses every single tweet of the event, tokenizes it * and stores the terms in a HashMap with their respective occurencies as * values. @@ -148,7 +148,7 @@ public final List getTweetIDs() { /** * Returns the five most common terms as a single String. * @return A String containing the five most common terms. - * @see getCommonTerms() getCommonTerms() method. + * @see #getCommonTerms() getCommonTerms() method. */ public final String getCommonTermsAsString() { if(commonTerms.isEmpty()) { @@ -167,7 +167,7 @@ public final String getCommonTermsAsString() { /** * Auxiliary method to sort a Map by value. * @param unsortedMap The Map to be sorted. - * @return A sorted List of the String keys. + * @param stemHandler A StemUtils object. */ public final void sortMapByValue(HashMap unsortedMap, StemUtils stemHandler) { //Initialize variables diff --git a/src/edmodule/peakfinding/event/PeakFindingEvents.java b/src/edmodule/peakfinding/event/PeakFindingEvents.java index bdbc375..fc45b48 100644 --- a/src/edmodule/peakfinding/event/PeakFindingEvents.java +++ b/src/edmodule/peakfinding/event/PeakFindingEvents.java @@ -28,7 +28,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.09_2005 + * @version 2016.04.30_1827 */ public class PeakFindingEvents { @@ -46,6 +46,7 @@ public class PeakFindingEvents { * @param bins A List of BinPair objects, containing all bins. * @param eventWindows A List of Window objects, containing the generated eventsTweets. * @param corpus A PeakFindingCorpus object. + * @param stemsHandler A StemUtils object. */ public PeakFindingEvents(HashMap> tweetsByWindow, List> bins, @@ -59,7 +60,7 @@ public PeakFindingEvents(HashMap> tweetsByWindow, } /** - * Returns the tweets that belong to a certain event.
    + * Returns the tweets that belong to a certain event.
    * More formally, it parses the auxiliary tweetsByWindow HashMap and appends * the relevant tweets into a String list. * @param window A Window object, the actual event. diff --git a/src/edmodule/utils/StringDateUtils.java b/src/edmodule/utils/StringDateUtils.java index 53ce378..7e47158 100644 --- a/src/edmodule/utils/StringDateUtils.java +++ b/src/edmodule/utils/StringDateUtils.java @@ -28,15 +28,15 @@ /** * * @author Lefteris Paraskevas - * @version 2016.03.27_2356 + * @version 2016.04.30_1827 */ public class StringDateUtils { /** - * Method to return a String date key, assembled in YYYYMMDD_HHMM.
    + * Method to return a String date key, assembled in YYYYMMDD_HHMM.
    * More formally, the key is constructed by a given date and is mapped - * to the nearest minute refresh window ('window' variable).
    - * E.g. for 10-minute refresh window, the key is mapped to the nearest 10-minute.
    + * to the nearest minute refresh window ('window' variable).
    + * E.g. for 10-minute refresh window, the key is mapped to the nearest 10-minute.
    * For 30-minute refresh window, the key is mapped to the nearest half-hour. * @param cal A Calendar instance. * @param date The date from which is key is going to be constructed. @@ -137,8 +137,8 @@ public final static void clearAndSetYearToMinute(Calendar c, String dateKey) { /** * Returns a complete Date object from a - * @param date - * @return + * @param date The date to be parsed in plain text. + * @return A date object from the input String. */ public final static Date getDateFromString(String date) { try { diff --git a/src/evaluator/EDCoWEvaluator.java b/src/evaluator/EDCoWEvaluator.java index af61f38..21ff688 100644 --- a/src/evaluator/EDCoWEvaluator.java +++ b/src/evaluator/EDCoWEvaluator.java @@ -36,7 +36,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.09_2017 + * @version 2016.04.30_1827 */ public class EDCoWEvaluator implements AbstractEvaluator { private int delta; @@ -69,8 +69,9 @@ public EDCoWEvaluator() { * @param timeSliceB An integer representing the ending time slice of the dataset. * @param minTermSupport A double representing the minimum term support of the terms in an event. * @param maxTermSupport A double representing the maximum term support of the terms in an event. - * @param eventList A list containing the events after the application of EDCoW algorithm. + * @param events A list containing the events after the application of EDCoW algorithm. * @param config A configuration object. + * @param stemsHandler A StemUtils object. */ public EDCoWEvaluator(int delta, int delta2, int gamma, int timeSliceA, int timeSliceB, double minTermSupport, double maxTermSupport, diff --git a/src/evaluator/PeakFindingEvaluator.java b/src/evaluator/PeakFindingEvaluator.java index 98c6d5a..626de93 100644 --- a/src/evaluator/PeakFindingEvaluator.java +++ b/src/evaluator/PeakFindingEvaluator.java @@ -34,7 +34,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.09_2017 + * @version 2016.04.30_1827 */ public class PeakFindingEvaluator implements AbstractEvaluator { private final double alpha; @@ -54,9 +54,11 @@ public class PeakFindingEvaluator implements AbstractEvaluator { * @param alpha A double value representing alpha parameter. * @param taph An integer value representing taph parameter. * @param pi An integer value representing pi parameter. + * @param eventList A List containing the events found by the algorithm. * @param config A configuration object. */ - public PeakFindingEvaluator(double alpha, int taph, int pi, List eventList, Config config) { + public PeakFindingEvaluator(double alpha, int taph, int pi, + List eventList, Config config) { this.alpha = alpha; this.taph = taph; this.pi = pi; @@ -105,6 +107,8 @@ public void evaluate(boolean showInlineInfo) { /** * Run the evaluation using only the 5 most common terms of every event. + * @param showInlineInfo Boolean flag that indicates whether to show or hide + * inline information during execution. */ public void evaluateWithCommonTerms(boolean showInlineInfo) { HashSet groundTruthKeywords; @@ -163,6 +167,8 @@ public void evaluateWithCommonTerms(boolean showInlineInfo) { /** * Run the evaluation method using all the generated terms in a specific event. + * @param showInlineInfo Boolean flag that indicates whether to show or hide + * inline information during execution. */ public void evaluateWithAllTerms(boolean showInlineInfo) { HashSet groundTruthKeywords; diff --git a/src/evs/EvS.java b/src/evs/EvS.java index 4a6af00..e9cad10 100644 --- a/src/evs/EvS.java +++ b/src/evs/EvS.java @@ -30,7 +30,6 @@ import experimenter.SentimentPeakFindingExperimenter; import java.io.IOException; import java.util.Scanner; -import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.ParseException; import utilities.Config; import utilities.Console; @@ -39,7 +38,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.29_1429 + * @version 2016.04.30_1828 */ public class EvS { @@ -62,6 +61,7 @@ public static void setShowMongoLoggingFlag(boolean value) { * Main method that provides a simple console input interface for the user, * if she wishes to execute the tool as a .jar executable. * @param args A list of arguments. + * @throws org.apache.commons.cli.ParseException ParseExcetion */ public static void main(String[] args) throws ParseException { diff --git a/src/evs/data/SentimentWindowEntity.java b/src/evs/data/SentimentWindowEntity.java index 8190818..2be2236 100644 --- a/src/evs/data/SentimentWindowEntity.java +++ b/src/evs/data/SentimentWindowEntity.java @@ -19,13 +19,13 @@ /** * * @author Lefteris Paraskevas - * @version 2016.03.16_1213 + * @version 2016.04.30_1828 * * Sentiment Entity class represents the main sentiment information wrapper. - * 0 --> Negative sentiment.
    - * 1 --> Neutral sentiment.
    - * 2 --> Positive sentiment.
    - * Anything different --> Irrelevant sentiment. + * 0 --> Negative sentiment.
    + * 1 --> Neutral sentiment.
    + * 2 --> Positive sentiment.
    + * Anything different --> Irrelevant sentiment. */ public class SentimentWindowEntity { private final double positivePercentage; diff --git a/src/evs/edcow/SentimentEDCoW.java b/src/evs/edcow/SentimentEDCoW.java index a5bfba0..6497c4d 100644 --- a/src/evs/edcow/SentimentEDCoW.java +++ b/src/evs/edcow/SentimentEDCoW.java @@ -29,10 +29,10 @@ /** * * @author Adrien GUILLE, ERIC Lab, University of Lyon 2 - * @email adrien.guille@univ-lyon2.fr + * email adrien.guille@univ-lyon2.fr * * @author Lefteris Paraskevas (configurations in SentimentEDCoW to omit missing components) - * @version 2016.03.28_0003 (For EDviaSA project version alignment) + * @version 2016.04.30_1828 (For EDviaSA project version alignment) */ public class SentimentEDCoW { private final int delta; //6 @@ -51,12 +51,12 @@ public class SentimentEDCoW { public SentimentEDCoWEvents events; /** - * Default constructor with minimum parameters.
    + * Default constructor with minimum parameters.
    * Delta is set to 6, gamma is set to 5, minimum term support is set to * 0.0001 and maximum term support is set to 0.01. If you wish to change the - * aforementioned values use the {@link #EDCoW(int, int, int, double, double, - * int, int, EDCoWCorpus) second constructor}. - * @param delta2 Delta2 value.
    + * aforementioned values use the {@link #SentimentEDCoW(int, int, int, double, + * double, int, int, evs.data.SentimentEDCoWCorpus, int) second constructor}. + * @param delta2 Delta2 value.
    * Prime divisors of the number of documents are required as values. It must * be cross-referenced with the number of documents. More specifically, the * outcome of the division between the number of documents and this metric @@ -64,6 +64,7 @@ public class SentimentEDCoW { * @param timeSliceA Beginning timeslice. * @param timeSliceB Ending timeslice. * @param corpus An EDCoWCorpus object. + * @param sentimentSource The source of the sentiment, internal or external. */ public SentimentEDCoW(int delta2, int timeSliceA, int timeSliceB, SentimentEDCoWCorpus corpus, int sentimentSource) { @@ -80,27 +81,29 @@ public SentimentEDCoW(int delta2, int timeSliceA, int timeSliceB, /** * Default constructor with the full set of parameters. - * @param delta1 Delta value (suggested 6).
    + * @param delta1 Delta value (suggested 6).
    * It directly affects the number of events. Increasing this value, reduces * the number of them and vice versa. - * @param delta2 Delta2 value.
    + * @param delta2 Delta2 value.
    * Prime divisors of the number of documents are required as values. It must * be cross-referenced with the number of documents. More specifically, the * outcome of the division between the number of documents and this metric * should result the number of total windows. - * @param gamma Gamma value (suggested 5).
    + * @param gamma Gamma value (suggested 5).
    * It affects the quality of the uncovered events. Values greater than 15, * seem to increase the number of the uncovered events. - * @param minTermSupport Minimum term support value (suggested 0.0001).
    + * @param minTermSupport Minimum term support value (suggested 0.0001).
    * Changing this value would result in altering the lower bound below which * a term should not be included in the keywords list of an event. - * @param maxTermSupport Maximum term support value (suggested 0.01).
    + * @param maxTermSupport Maximum term support value (suggested 0.01).
    * Changing this value would result in altering the upper bound above which * a term should not be included in the keywords list of an event. * @param timeSliceA Starting timeslice. * @param timeSliceB Ending timeslice. * @param corpus An EDCoWCorpus object. - * @see #EDCoW(int, int, int, EDCoWCorpus, int) EDCoW() minimum constructor. + * @param sentimentSource The source of the sentiment, internal or external. + * @see #SentimentEDCoW(int, int, int, evs.data.SentimentEDCoWCorpus, int) + * SentimentEDCoW() minimum constructor. */ public SentimentEDCoW(int delta1, int delta2, int gamma, double minTermSupport, double maxTermSupport, int timeSliceA, int timeSliceB, @@ -203,65 +206,66 @@ public void apply() { * Method to run the algorithm and analyze terms and frequencies in a * specific window. * @param window The window index (0, 1, 2 etc). + * @throws java.lang.Exception General Exception */ public void processWindow(int window) throws Exception { - //try{ - LinkedList keyWords = new LinkedList<>(); - Integer[] distributioni = corpus.getEDCoWCorpus().getNumberOfDocuments(); - double[] distributiond = new double[delta2]; - int startSlice = window * delta2; - int endSlice = startSlice + delta2 - 1; + + LinkedList keyWords = new LinkedList<>(); + Integer[] distributioni = corpus.getEDCoWCorpus().getNumberOfDocuments(); + double[] distributiond = new double[delta2]; + int startSlice = window * delta2; + int endSlice = startSlice + delta2 - 1; + for(int i = startSlice; i < endSlice; i++){ + distributiond[i-startSlice] = (double) distributioni[i]; + } + termDocMap.entrySet().stream().forEach((entry) -> { + Integer frequencyf[] = entry.getValue(); + double frequencyd[] = new double[delta2]; for(int i = startSlice; i < endSlice; i++){ - distributiond[i-startSlice] = (double) distributioni[i]; + frequencyd[i-startSlice] = (double) frequencyf[i]; } - termDocMap.entrySet().stream().forEach((entry) -> { - Integer frequencyf[] = entry.getValue(); - double frequencyd[] = new double[delta2]; - for(int i = startSlice; i < endSlice; i++){ - frequencyd[i-startSlice] = (double) frequencyf[i]; - } - keyWords.add(new SentimentEDCoWKeyword(entry.getKey(), frequencyd, delta, distributiond)); - }); - double[] autoCorrelationValues = new double[keyWords.size()]; - for(int i = 0; i < keyWords.size(); i++){ - autoCorrelationValues[i] = keyWords.get(i).getAutoCorrelation(); - } - SentimentEDCoWThreshold th1 = new SentimentEDCoWThreshold(); - double theta1 = th1.theta1(autoCorrelationValues, gamma); + keyWords.add(new SentimentEDCoWKeyword(entry.getKey(), frequencyd, delta, distributiond)); + }); + double[] autoCorrelationValues = new double[keyWords.size()]; + for(int i = 0; i < keyWords.size(); i++){ + autoCorrelationValues[i] = keyWords.get(i).getAutoCorrelation(); + } + SentimentEDCoWThreshold th1 = new SentimentEDCoWThreshold(); + double theta1 = th1.theta1(autoCorrelationValues, gamma); - // Removing trivial keywords based on theta1 - LinkedList keyWordsList1 = new LinkedList<>(); - keyWords.stream().filter((k) -> (k.getAutoCorrelation() > theta1)).forEach((k) -> { - keyWordsList1.add(k); - }); - - keyWordsList1.stream().forEach((kw1) -> { - kw1.computeCrossCorrelation(keyWordsList1); - }); - - double[][] bigMatrix = new double[keyWordsList1.size()][keyWordsList1.size()]; - for(int i=0; i < keyWordsList1.size(); i++){ - bigMatrix[i] = keyWordsList1.get(i).getCrossCorrelation(); - } + // Removing trivial keywords based on theta1 + LinkedList keyWordsList1 = new LinkedList<>(); + keyWords.stream().filter((k) -> (k.getAutoCorrelation() > theta1)).forEach((k) -> { + keyWordsList1.add(k); + }); - //Compute theta2 using the BigMatrix - double theta2 = th1.theta2(bigMatrix, gamma); - for(int i = 0; i < keyWordsList1.size(); i++){ - for(int j = i+1; j < keyWordsList1.size(); j++){ - bigMatrix[i][j] = (bigMatrix[i][j] < theta2) ? 0 : bigMatrix[i][j]; - } + keyWordsList1.stream().forEach((kw1) -> { + kw1.computeCrossCorrelation(keyWordsList1); + }); + + double[][] bigMatrix = new double[keyWordsList1.size()][keyWordsList1.size()]; + for(int i=0; i < keyWordsList1.size(); i++){ + bigMatrix[i] = keyWordsList1.get(i).getCrossCorrelation(); + } + + //Compute theta2 using the BigMatrix + double theta2 = th1.theta2(bigMatrix, gamma); + for(int i = 0; i < keyWordsList1.size(); i++){ + for(int j = i+1; j < keyWordsList1.size(); j++){ + bigMatrix[i][j] = (bigMatrix[i][j] < theta2) ? 0 : bigMatrix[i][j]; } - SentimentEDCoWModularityDetection modularity = new SentimentEDCoWModularityDetection(keyWordsList1, bigMatrix, startSlice, endSlice); + } + SentimentEDCoWModularityDetection modularity = new SentimentEDCoWModularityDetection(keyWordsList1, bigMatrix, startSlice, endSlice); - double thresholdE = 0.1; - ArrayList finalArrCom = modularity.getCommunitiesFiltered(thresholdE); - finalArrCom.stream().map((c) -> { - System.out.println(c.getCommunitySize()); - return c; - }).forEach((c) -> { - modularity.saveEventFromCommunity(c); - }); - eventList.addAll(modularity.getEvents()); + double thresholdE = 0.1; + ArrayList finalArrCom = modularity.getCommunitiesFiltered(thresholdE); + finalArrCom.stream().map((c) -> { + System.out.println(c.getCommunitySize()); + return c; + }).forEach((c) -> { + modularity.saveEventFromCommunity(c); + }); + eventList.addAll(modularity.getEvents()); } /** diff --git a/src/evs/evaluator/SentimentEDCoWEvaluator.java b/src/evs/evaluator/SentimentEDCoWEvaluator.java index 18d6cf5..bd7a88b 100644 --- a/src/evs/evaluator/SentimentEDCoWEvaluator.java +++ b/src/evs/evaluator/SentimentEDCoWEvaluator.java @@ -36,7 +36,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.03.28_0005 + * @version 2016.04.30_1828 */ public class SentimentEDCoWEvaluator implements AbstractSentimentEvaluator { private int delta; @@ -70,8 +70,9 @@ public SentimentEDCoWEvaluator() { * @param timeSliceB An integer representing the ending time slice of the dataset. * @param minTermSupport A double representing the minimum term support of the terms in an event. * @param maxTermSupport A double representing the maximum term support of the terms in an event. - * @param eventList A list containing the events after the application of EDCoW algorithm. + * @param events A list containing the events after the application of EDCoW algorithm. * @param config A configuration object. + * @param stemsHandler A StemUtils object. */ public SentimentEDCoWEvaluator(int delta, int delta2, int gamma, int timeSliceA, int timeSliceB, double minTermSupport, double maxTermSupport, diff --git a/src/evs/evaluator/SentimentPeakFindingEvaluator.java b/src/evs/evaluator/SentimentPeakFindingEvaluator.java index 24181aa..c454e55 100644 --- a/src/evs/evaluator/SentimentPeakFindingEvaluator.java +++ b/src/evs/evaluator/SentimentPeakFindingEvaluator.java @@ -34,7 +34,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.09_2053 + * @version 2016.04.30_1829 */ public class SentimentPeakFindingEvaluator implements AbstractSentimentEvaluator { private final double alpha; @@ -54,6 +54,7 @@ public class SentimentPeakFindingEvaluator implements AbstractSentimentEvaluator * @param alpha A double value representing alpha parameter. * @param taph An integer value representing taph parameter. * @param pi An integer value representing pi parameter. + * @param eventList A List containing the events found by the algorithm. * @param config A configuration object. */ public SentimentPeakFindingEvaluator(double alpha, int taph, int pi, @@ -106,6 +107,8 @@ public void evaluate(boolean showInlineInfo) { /** * Run the evaluation using only the 5 most common terms of every event. + * @param showInlineInfo Boolean flag that indicates whether to show or hide + * inline information during execution. */ public void evaluateWithCommonTerms(boolean showInlineInfo) { HashSet groundTruthKeywords; @@ -164,6 +167,8 @@ public void evaluateWithCommonTerms(boolean showInlineInfo) { /** * Run the evaluation method using all the generated terms in a specific event. + * @param showInlineInfo Boolean flag that indicates whether to show or hide + * inline information during execution. */ public void evaluateWithAllTerms(boolean showInlineInfo) { HashSet groundTruthKeywords; diff --git a/src/evs/peakfinding/SentimentPeakFinding.java b/src/evs/peakfinding/SentimentPeakFinding.java index 7300f62..0d6608d 100644 --- a/src/evs/peakfinding/SentimentPeakFinding.java +++ b/src/evs/peakfinding/SentimentPeakFinding.java @@ -31,7 +31,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.03.28_0007 + * @version 2016.04.30_1829 * * Based on [1] Marcus A. et al., "TwitInfo: Aggregating and Visualizing * Microblogs for Event Exploration", CHI 2011. @@ -54,14 +54,18 @@ public class SentimentPeakFinding implements AbstractEDMethod { /** * Public constructor. - * @param bins Bins parameter, containing the count of tweets in pre-specified time intervals. - * @param a Alpha parameter to capture historical information. Values lower than 1 are recommended. + * @param bins Bins parameter, containing the count of tweets in pre-specified + * time intervals. + * @param a Alpha parameter to capture historical information. Values lower + * than 1 are recommended. * @param t Threshold parameter. - * @param p Primary parameter indicates the first bins to be considered in calculating initial mean deviance. - * @param refreshWindow An integer representing the refresh window of every bin. + * @param p Primary parameter indicates the first bins to be considered in + * calculating initial mean deviance. * @param corpus A PeakFindingCorpus object. + * @param sentimentSouce The source of the sentiment, internal or external. */ - public SentimentPeakFinding(List> bins, double a, int t, int p, PeakFindingSentimentCorpus corpus, int sentimentSouce) { + public SentimentPeakFinding(List> bins, double a, + int t, int p, PeakFindingSentimentCorpus corpus, int sentimentSouce) { alpha = a; taph = t; pi = p; diff --git a/src/evs/peakfinding/event/SentimentPeakFindingEvent.java b/src/evs/peakfinding/event/SentimentPeakFindingEvent.java index 3cd5455..5d09265 100644 --- a/src/evs/peakfinding/event/SentimentPeakFindingEvent.java +++ b/src/evs/peakfinding/event/SentimentPeakFindingEvent.java @@ -34,7 +34,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.03.28_0009 + * @version 2016.04.30_1829 */ public class SentimentPeakFindingEvent { @@ -59,6 +59,7 @@ public class SentimentPeakFindingEvent { * @param tweetsOfEvent A List of String containing the corresponding tweetsOfEvent of the * event. * @param corpus A PeakFindingCorpus object. + * @param sentimentSource The source of sentiment (internal or external). */ public SentimentPeakFindingEvent(int id, Window window, List tweetsOfEvent, PeakFindingSentimentCorpus corpus, int sentimentSource) { @@ -85,7 +86,7 @@ public SentimentPeakFindingEvent(int id, Window window, List getWindow() { return window; } /** - * Calculates the main sentiment of an event.
    + * Calculates the main sentiment of an event.
    * More formally, it counts separately the sentiment of every tweet belonging * to a certain event and updates the field 'mainSentiment' with the appropriate * value. @@ -156,7 +157,7 @@ private void calculateSentimentStatistics(int sentimentSource) { /** * Generates a List with the most common terms of the tweetsOfEvent that belong - * to the specific event.
    + * to the specific event.
    * More formally, it parses every single tweet of the event, tokenizes it * and stores the terms in a HashMap with their respective occurencies as * values. @@ -211,14 +212,14 @@ public final List getCommonTerms() { } /** - * - * @return + * Returns all the terms of the event. + * @return A HashSet containing all event's terms. */ public final HashSet getAllTerms() { return allTerms; } /** - * - * @return + * Returns all the tweet IDs of the event. + * @return A List containing all the tweet ID's of the event. */ public final List getTweetIDs() { List ids = new ArrayList<>(); @@ -230,7 +231,7 @@ public final List getTweetIDs() { /** * Returns the five most common terms as a single String. * @return A String containing the five most common terms. - * @see getCommonTerms() getCommonTerms() method. + * @see #getCommonTerms() getCommonTerms() method. */ public final String getCommonTermsAsString() { if(commonTerms.isEmpty()) { @@ -248,7 +249,7 @@ public final String getCommonTermsAsString() { /** * Auxiliary method to sort a Map by value. * @param unsortedMap The Map to be sorted. - * @return A sorted List of the String keys. + * @param stemHandler A StemUtils object. */ public final void sortMapByValue(HashMap unsortedMap, StemUtils stemHandler) { //Initialize variables diff --git a/src/evs/peakfinding/event/SentimentPeakFindingEvents.java b/src/evs/peakfinding/event/SentimentPeakFindingEvents.java index 9c67f6b..b11acff 100644 --- a/src/evs/peakfinding/event/SentimentPeakFindingEvents.java +++ b/src/evs/peakfinding/event/SentimentPeakFindingEvents.java @@ -29,7 +29,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.03.28_0009 + * @version 2016.04.30_1829 */ public class SentimentPeakFindingEvents { @@ -47,6 +47,8 @@ public class SentimentPeakFindingEvents { * @param bins A List of BinPair objects, containing all bins. * @param eventWindows A List of Window objects, containing the generated eventsTweets. * @param corpus A PeakFindingCorpus object. + * @param stemsHandler A StemUtils object. + * @param sentimentSouce The source of sentiment, internal or external. */ public SentimentPeakFindingEvents(HashMap> tweetsByWindow, List> bins, @@ -61,7 +63,7 @@ public SentimentPeakFindingEvents(HashMap> tweetsByWind } /** - * Returns the tweets that belong to a certain event.
    + * Returns the tweets that belong to a certain event.
    * More formally, it parses the auxiliary tweetsByWindow HashMap and appends * the relevant tweets into a String list. * @param window A Window object, the actual event. @@ -86,6 +88,7 @@ public final ArrayList getTweetsOfEvent(Window window) /** * Method to generate all subsequent events and create a list of them for * future use. + * @param sentimentSouce The source of sentiment, internal or external. */ public final void generateEvents(int sentimentSouce) { eventWindows.stream().map((window) -> new ArrayList<>(getTweetsOfEvent(window))).forEach((tweets) -> { diff --git a/src/experimenter/PeakFindingExperimenter.java b/src/experimenter/PeakFindingExperimenter.java index 32d3b8c..9d99f30 100644 --- a/src/experimenter/PeakFindingExperimenter.java +++ b/src/experimenter/PeakFindingExperimenter.java @@ -29,7 +29,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.09_1944 + * @version 2016.04.30_1830 */ public class PeakFindingExperimenter { private final PeakFindingCorpus corpus; @@ -37,18 +37,14 @@ public class PeakFindingExperimenter { private double alpha; private int taph; private final int pi; - private final int window; private final Config config; - public PeakFindingExperimenter(PeakFindingCorpus corpus, - List> bins, double alpha, int taph, int pi, - int window, Config config) { + public PeakFindingExperimenter(PeakFindingCorpus corpus, List> bins, double alpha, int taph, int pi, Config config) { this.corpus = corpus; this.bins = bins; this.alpha = alpha; this.taph = taph; this.pi = pi; - this.window = window; this.config = config; } @@ -58,8 +54,8 @@ public PeakFindingExperimenter(PeakFindingCorpus corpus, * @param start An integer representing the starting point of taph parameter. * @param end An integer representing the ending point of taph parameter. * @param step An integer representing the increase step of taph between iterations. - * @param sentimentSource The source of sentiment. 0 represents SST, 1 Naive - * Bayes and 2 Bayesian Network. + * @param showInlineInfo A boolean flag that indicates whether to show or hide + * inline information during execution. * @return A List of String representing the lines to be exported to the file. */ public final List experimentUsingTaph(int start, int end, int step, @@ -104,8 +100,8 @@ public final List experimentUsingTaph(int start, int end, int step, * @param start An integer representing the starting point of taph parameter. * @param end An integer representing the ending point of taph parameter. * @param step An integer representing the increase step of taph between iterations. - * @param sentimentSource The source of sentiment. 0 represents SST, 1 Naive - * Bayes and 2 Bayesian Network. + * @param showInlineInfo Boolean flag that indicates whether to show or hide + * inline information during execution. * @return A List of String representing the lines to be exported to the file. */ public final List experimentUsingAlpha(double start, double end, double step, diff --git a/src/preprocessingmodule/nlp/stopwords/StopWords.java b/src/preprocessingmodule/nlp/stopwords/StopWords.java index 4ba1c92..dea74df 100644 --- a/src/preprocessingmodule/nlp/stopwords/StopWords.java +++ b/src/preprocessingmodule/nlp/stopwords/StopWords.java @@ -32,7 +32,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.01.31_1921 + * @version 2016.04.30_1831 */ public final class StopWords { @@ -42,7 +42,7 @@ public final class StopWords { /** * Public constructor that initializes a HashSet. - * @param config + * @param config A Config object. */ public StopWords(Config config) { this.config = config; diff --git a/src/utilities/Utilities.java b/src/utilities/Utilities.java index 5b3dda0..6ac5099 100644 --- a/src/utilities/Utilities.java +++ b/src/utilities/Utilities.java @@ -39,7 +39,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.24_1941 + * @version 2016.04.30_1831 */ public class Utilities { @@ -47,6 +47,7 @@ public class Utilities { * Prints the execution time of a current running method in seconds. * @param startTime Long representing the current System time when the method started. * @param endTime Long representing the current System time when the method finished. + * @param className The name of the class the method belongs to. * @param methodName A String containing the name of the current running method. */ public static void printExecutionTime(long startTime, long endTime, String className, String methodName) { @@ -63,7 +64,7 @@ public static void printExecutionTime(long startTime, long endTime, String class * Supplies a message to the error stream, formatting it according to a * standard form, appending '\n' escape character at the end. * @param message The message to be printed. - * @see printMessage() printMessage() method. + * @see #printMessage(java.lang.String) printMessage() method. */ public final static void printMessageln(String message) { DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); @@ -77,7 +78,7 @@ public final static void printMessageln(String message) { * Supplies a message to the error stream, formatting it according to a * standard form, without creating a new line. * @param message The message to be printed. - * @see printMessageln() printMessageln() method. + * @see #printMessageln(java.lang.String) printMessageln() method. */ public final static void printMessage(String message) { DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); @@ -168,7 +169,7 @@ public static final String[] extractTermsFromFile(Config config) { * @param date A String formed in 'hh:MM PM/AM - dd MMM YYYY'. * @return A Date object */ - public static final Date stringToDate(String date, ArrayList tweet) { + public static final Date stringToDate(String date) { try { DateFormat format = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy", Locale.ENGLISH); return format.parse(date); diff --git a/src/utilities/dsretriever/MongoHandler.java b/src/utilities/dsretriever/MongoHandler.java index 49f4aae..f66ee65 100644 --- a/src/utilities/dsretriever/MongoHandler.java +++ b/src/utilities/dsretriever/MongoHandler.java @@ -36,7 +36,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.04.16_1515 + * @version 2016.04.30_1832 */ public class MongoHandler { @@ -141,17 +141,17 @@ public final void applyLangFilter(String filter) { /** * Stores a tweet retrieved previously from a 3-party source (e.g. a text file). * @param tweet An ArrayList containing the appropriate information for the - * tweet. The list must be created according to the following scheme:
    - * [0] -> Tweet ID
    - * [1] -> User (For retweets this is the original user created the tweet)
    - * [2] -> 1 if the tweet is a retweet, 0 otherwise
    - * [3] -> Text of the tweet
    - * [4] -> Date and time of the original tweet in YYYY-MM-DD HH:MM:SS.ZZZZ fashion
    - * [5] -> Number of retweets
    - * [6] -> Number of favorites
    - * [7] -> Latitude (if available, -1 otherwise)
    - * [8] -> Longitude (if available, -1 otherwise)
    - * @param config A configuration object. + * tweet. The list must be created according to the following scheme:
    + * [0] -> Tweet ID
    + * [1] -> User (For retweets this is the original user created the tweet)
    + * [2] -> 1 if the tweet is a retweet, 0 otherwise
    + * [3] -> Text of the tweet
    + * [4] -> Date and time of the original tweet in YYYY-MM-DD HH:MM:SS.ZZZZ fashion
    + * [5] -> Number of retweets
    + * [6] -> Number of favorites
    + * [7] -> Latitude (if available, -1 otherwise)
    + * [8] -> Longitude (if available, -1 otherwise)
    + * @param tweet A List containing a tweet. * @return True if the process succeeds, false otherwise. */ public final boolean insertTweetIntoMongo(ArrayList tweet) { @@ -165,7 +165,7 @@ public final boolean insertTweetIntoMongo(ArrayList tweet) { .append(config.getTextFieldName(), Utilities.assembleText(tweet.get(2), tweet.get(3))) //Actual tweet .append(config.getDateFieldName(), - Utilities.stringToDate(tweet.get(4), tweet)) //Date published/retrieved + Utilities.stringToDate(tweet.get(4))) //Date published/retrieved .append(config.getRetweetedFieldName(), (Integer.parseInt(tweet.get(5)) > 0)) //Boolean .append(config.getRetweetsCountFieldName(), @@ -197,7 +197,7 @@ public final boolean insertTweetIntoMongo(ArrayList tweet) { .append(config.getTextFieldName(), Utilities.assembleText(tweet.get(2), tweet.get(3))) //Actual tweet .append(config.getDateFieldName(), - Utilities.stringToDate(tweet.get(4), tweet)) //Date published/retrieved + Utilities.stringToDate(tweet.get(4))) //Date published/retrieved .append(config.getRetweetedFieldName(), (Integer.parseInt(tweet.get(5)) > 0)) //Boolean .append(config.getRetweetsCountFieldName(), @@ -391,7 +391,7 @@ public void apply(final Document tweetDoc) { /** * This method parses the MongoDB store and returns the tweet that matches - * a given ID.
    + * a given ID.
    * Note: Consider creating an index on the field 'id' of the MongoDB Store. * @param id The id of the tweet to be retrieved. * @return A Tweet object containing all the information found in the document @@ -510,7 +510,7 @@ public final Tweet getATweetByIdFromMongoDBStore(long id) { } /** - * Updates an existing tweet with its sentiment information.
    + * Updates an existing tweet with its sentiment information.
    * @param id The id of the tweet to be updated * @param sentiment An integer representing the sentiment polarity of the tweet. * @param fieldName The name of the field where the sentiment will be stored. @@ -529,7 +529,7 @@ public final void updateSentiment(long id, int sentiment, String fieldName) { } /** - * Updates an existing tweet with emoticon information.
    + * Updates an existing tweet with emoticon information.
    * More formally, the method adds two new attributes namely 'posEmot' and * 'negEmot' that indicate whether or not a given tweet has any positive or * negative emoticon, with zero indicating total absence and one indicating @@ -576,6 +576,7 @@ public final boolean tweetExists(long id) { /** * Checks whether a tweet is already annotated with its stanfordSentiment. * @param id The ID of the tweet to be checked for. + * @param fieldName The MongoDB field name of ID. * @return True if the tweet is stanfordSentiment annotated, false otherwise. */ public final boolean tweetHasSentiment(long id, String fieldName) { @@ -625,12 +626,12 @@ public final boolean dropDB() { /** * Removes all retweets that are found in the MongoDB Store for which their - * original tweet is also stored in the DB.
    + * original tweet is also stored in the DB.
    * More formally, it parses all stored tweets and checks whether a specific * tweet is also a retweet. When this condition is true, the method tries to * find whether the tweet that this retweet is originated from, also exists * in the store and if so, the retweet is removed from the collection. - *
    + *
    * *WARNING:* This process is "one-way", meaning that once initiated the * retweets that are going to be removed cannot be restored back. */ diff --git a/src/utilities/dsretriever/TweetsRetriever.java b/src/utilities/dsretriever/TweetsRetriever.java index 997380e..c3fa7af 100644 --- a/src/utilities/dsretriever/TweetsRetriever.java +++ b/src/utilities/dsretriever/TweetsRetriever.java @@ -36,7 +36,7 @@ /** * * @author Lefteris Paraskevas - * @version 2016.03.27_2334 + * @version 2016.04.30_1832 */ public class TweetsRetriever { @@ -88,7 +88,7 @@ public final void retrieveTweetsById(List tweetIDs, MongoHandler mongoDB } /** - * Method that handles the Twitter streaming API.
    + * Method that handles the Twitter streaming API.
    * WARNING: Method does not terminate by itself, due to the fact that * the streamer runs in a different thread. * @param keywords The keywords for which the streamer searches for tweets.