Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interaction matrix of user #10

Merged
merged 26 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
15ac13d
added interaction matrix
siddz415 Aug 23, 2024
05e8dc5
restructured folders
siddz415 Aug 23, 2024
d908840
made changes to the files
siddz415 Sep 1, 2024
b5143b0
added lightfm_recommendation.py
siddz415 Sep 27, 2024
3f6bc40
Merge branch 'main' into interaction-matrix-of-user
siddz415 Oct 25, 2024
1bc61e7
Merge branch 'main' of https://github.com/noisebridge/MediaBridge int…
jhanley634 Nov 9, 2024
08459c1
Merge branch 'main' into interaction-matrix-of-user
jhanley634 Nov 9, 2024
4daa1f2
Merge branch 'interaction-matrix-of-user' of https://github.com/noise…
jhanley634 Nov 9, 2024
4a174de
isort
jhanley634 Nov 9, 2024
a5f952f
Merge branch 'main' into interaction-matrix-of-user
cocomittens Nov 23, 2024
3c72e1a
Merge branch 'main' of https://github.com/noisebridge/MediaBridge int…
cocomittens Nov 23, 2024
69bd478
Remove movie titles
cocomittens Nov 23, 2024
c84efab
Merge branch 'interaction-matrix-of-user' of https://github.com/noise…
cocomittens Nov 23, 2024
05f41d0
Remove mv-01
cocomittens Nov 23, 2024
25f83dd
Remove mv-02
cocomittens Nov 23, 2024
038ad44
Remove extra else statement
cocomittens Nov 23, 2024
432df32
Add sparse matrix, fix files
cocomittens Nov 23, 2024
f04ba20
Comment unused code
cocomittens Nov 23, 2024
6d197bd
Remove unused import
cocomittens Nov 23, 2024
e6d4548
Consolidate all files and functions for interaction matrix into a mor…
audiodude Nov 23, 2024
27d303e
Update matrix
cocomittens Nov 23, 2024
26fde84
Remove errant main file
audiodude Nov 23, 2024
14d136f
Refactor file listing code
audiodude Dec 6, 2024
3655b26
Merge branch 'main' into interaction-matrix-of-user
audiodude Dec 6, 2024
713639a
Ruff format
audiodude Dec 6, 2024
29346a6
Update interaction_matrix.py
cocomittens Dec 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
mv_0000001.txt
mv_0000002.txt
movie_titles.txt
20 changes: 10 additions & 10 deletions data_processing/load_data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# This script handles loading the movie titles and ratings data
import os

def load_movie_titles(filepath):
movie_data = []
with open(filepath, 'r', encoding='ISO-8859-1') as file:
for line in file:
movie_id, year, title = line.strip().split(",", 2)
movie_data.append([int(movie_id), int(year), title])
return movie_data
# def load_movie_titles(filepath):
# movie_data = []
# with open(filepath, 'r', encoding='ISO-8859-1') as file:
# for line in file:
# movie_id, year, title = line.strip().split(",", 2)
# movie_data.append([int(movie_id), int(year), title])
# return movie_data (This function is not being called in the main.py file. It is not needed for the current implementation.)

def load_ratings(directory_path):
files = sorted([f for f in os.listdir(directory_path) if f.startswith('mv_')])
return files
def list_rating_files(directory_path):
return sorted([f for f in os.listdir(directory_path) if f.startswith('mv_')]) # List of files starting with 'mv_'

9 changes: 4 additions & 5 deletions data_processing/process_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@ def create_interaction_matrix(directory_path, num_users, num_movies, files):
user_id = int(user_id)
rating = int(rating)

if rating in (4, 5):
if user_id not in user_mapper:
if rating in (4, 5) and user_id not in user_mapper: # used AND operator instead nested IF
audiodude marked this conversation as resolved.
Show resolved Hide resolved
user_mapper[user_id] = current_user_index
audiodude marked this conversation as resolved.
Show resolved Hide resolved
current_user_index += 1
user_idx = user_mapper[user_id]
user_idx = user_mapper[user_id]

interaction_matrix[user_idx, movie_idx] = rating
return interaction_matrix
interaction_matrix[user_idx, movie_idx] = rating
return interaction_matrix
31 changes: 30 additions & 1 deletion main.py
audiodude marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1 +1,30 @@
# Main entry point to run the recommendation pipeline
# Main entry point to run the recommendation pipeline
from data_processing.load_data import list_rating_files
from data_processing.process_data import create_interaction_matrix
from data_processing.save_data import save_matrix
# from db.mongo_connection import get_db_connection
# from db.movie_storage import insert_movie_data

# Configurations
data_directory = './data/'
audiodude marked this conversation as resolved.
Show resolved Hide resolved
movie_titles_file = f'{data_directory}movie_titles.txt'
output_file = f'{data_directory}interaction_matrix.pkl'
mongo_uri = 'mongodb://localhost:27017/'
db_name = 'movie_recommendation'

# Number of users and movies
num_users = 480189 # Example: Replace with actual value
num_movies = 17770 # Example: Replace with actual value

# Step 1: Load Data
movie_data = list_rating_files(movie_titles_file)
audiodude marked this conversation as resolved.
Show resolved Hide resolved

# Step 2: Process Data
interaction_matrix = create_interaction_matrix(data_directory, num_users, num_movies, movie_data)

# Step 3: Save Data
save_matrix(interaction_matrix, output_file)

# Step 4: Store Movies in MongoDB
# db = get_db_connection(uri=mongo_uri, db_name=db_name)
# insert_movie_data(db, 'movies', movie_data)