Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MovieRecommender Class #25

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@

# Created by https://www.toptal.com/developers/gitignore/api/vscode,intellij,java
# Edit at https://www.toptal.com/developers/gitignore?templates=vscode,intellij,java

### Intellij ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# Generated files
.idea/**/contentModel.xml

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests

# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

### Intellij Patch ###
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721

# *.iml
# modules.xml
# .idea/misc.xml
# *.ipr

# Sonarlint plugin
# https://plugins.jetbrains.com/plugin/7973-sonarlint
.idea/**/sonarlint/

# SonarQube Plugin
# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
.idea/**/sonarIssues.xml

# Markdown Navigator plugin
# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
.idea/**/markdown-navigator.xml
.idea/**/markdown-navigator-enh.xml
.idea/**/markdown-navigator/

# Cache file creation bug
# See https://youtrack.jetbrains.com/issue/JBR-2257
.idea/$CACHE_FILE$

# CodeStream plugin
# https://plugins.jetbrains.com/plugin/12206-codestream
.idea/codestream.xml

### Java ###
# Compiled class file
*.class

# Log file
*.log

# BlueJ files
*.ctxt

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files #
*.jar
*.war
*.nar
*.ear
*.zip
*.tar.gz
*.rar
*.gz

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*

### vscode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace

target
.classpath
.project
.settings/

# End of https://www.toptal.com/developers/gitignore/api/vscode,intellij,java
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package nearsoft.academy.bigdata.recommendation;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

class MovieRecommender {
String filePath;
int totalUsers;
int totalReviews;
int totalProducts;
Hashtable<String, Integer> users;
Hashtable<String, Integer> products;
Hashtable<Integer, String> productsById;

MovieRecommender(String fileUrl) throws IOException {
this.filePath = fileUrl;
this.totalUsers = 0;
this.totalReviews = 0;
this.totalProducts = 0;
this.users = new Hashtable<String, Integer>();
this.products = new Hashtable<String, Integer>();
this.productsById = new Hashtable<Integer, String>();

processFile();
}

public void processFile() throws IOException {
FileInputStream file = new FileInputStream(this.filePath);
GZIPInputStream gzipInput = new GZIPInputStream(file);
Reader decoder = new InputStreamReader(gzipInput);
BufferedReader reader = new BufferedReader(decoder);

Pattern usersRegex = Pattern.compile("review\\/userId: ([\\D\\d]+)");
Pattern reviewsRegex = Pattern.compile("review\\/score: ([\\D\\d]+)");
Pattern productsRegex = Pattern.compile("product\\/productId: ([\\D\\d]+)");

Matcher match;
boolean matches;

String currentLine = reader.readLine();

FileWriter writer = new FileWriter("movies.csv");

String userId = "";
String reviewId = "";
String productId = "";

while (currentLine != null) {

match = usersRegex.matcher(currentLine);
matches = match.matches();

if (matches) {
userId = currentLine.split(" ")[1];

if (users.get(userId) == null) {
this.totalUsers++;
users.put(userId, this.totalUsers);
}
}

match = reviewsRegex.matcher(currentLine);
matches = match.matches();

if (matches) {
reviewId = currentLine.split(" ")[1];
this.totalReviews++;
}

match = productsRegex.matcher(currentLine);
matches = match.matches();

if (matches) {
productId = currentLine.split(" ")[1];

if (products.get(productId) == null) {
this.totalProducts++;
products.put(productId, this.totalProducts);
productsById.put(this.totalProducts, productId);
}
}

if (userId != "" && reviewId != "" && productId != "") {
writer.write(users.get(userId) + "," + products.get(productId) + "," + reviewId + "\n");
userId = "";
reviewId = "";
productId = "";
}

currentLine = reader.readLine();
}

reader.close();
writer.close();
}

public int getTotalUsers() { return this.totalUsers; }

public int getTotalReviews() { return this.totalReviews; }

public int getTotalProducts() { return this.totalProducts; }

public List<String> getRecommendationsForUser(String userId) throws IOException, TasteException {
DataModel model = new FileDataModel(new File("movies.csv"));
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);

List<String> recommendations = new ArrayList<String>();

for (RecommendedItem recommendation : recommender.recommend(users.get(userId), 3)) {
recommendations.add(productsById.get((int)(recommendation.getItemID())));
}

return recommendations;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
MovieRecommender recommender = new MovieRecommender("movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
Expand Down