Skip to content

Commit

Permalink
Merge pull request #10 from noisebridge/interaction-matrix-of-user
Browse files Browse the repository at this point in the history
Interaction matrix of user
  • Loading branch information
audiodude authored Dec 6, 2024
2 parents b4a97c3 + 29346a6 commit 35709da
Show file tree
Hide file tree
Showing 8 changed files with 297 additions and 91 deletions.
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
interaction-matrix-of-user
mv_0000001.txt
mv_0000002.txt
movie_titles.txt
myenv/

data
out
.env
.pytest_cache
__pycache__
__pycache__
main
3 changes: 2 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ python-dotenv = "~=1.0"
requests = "~=2.26"
tqdm = "~=4.66"
typer = "~=0.12"
scipy = "*"

[dev-packages]
pytest = "~=8.3"
Expand All @@ -19,4 +20,4 @@ python_version = "3.12"

[scripts]
dev = "pipenv run python -m mediabridge.main"
test = "pipenv run pytest"
test = "pipenv run pytest"
273 changes: 187 additions & 86 deletions Pipfile.lock

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions lightfm_recommendation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np
from lightfm import LightFM
from lightfm.evaluation import auc_score, precision_at_k
from scipy.sparse import coo_matrix

# Example interaction data
user_ids = [0, 0, 1, 1, 2]
movie_ids = [0, 1, 1, 2, 2]
ratings = [5, 4, 5, 4, 5] # Only 4s and 5s for this example

# Create sparse interaction matrix
interaction_matrix = coo_matrix((ratings, (user_ids, movie_ids)), shape=(3, 3))

# Initialize LightFM model with WARP loss
model = LightFM(loss="warp")

# Train the model
model.fit(interaction_matrix, epochs=30, num_threads=2)

# Predict scores for User 0
scores = model.predict(0, np.arange(3))
top_items = np.argsort(-scores)
print("Top recommended items for User 0:", top_items)


precision = precision_at_k(model, interaction_matrix, k=5).mean()
auc = auc_score(model, interaction_matrix).mean()

print(f"Precision at k=5: {precision}, AUC Score: {auc}")
1 change: 0 additions & 1 deletion mediabridge/data_processing/build_matrices.py

This file was deleted.

1 change: 0 additions & 1 deletion mediabridge/data_processing/credentials

This file was deleted.

71 changes: 71 additions & 0 deletions mediabridge/data_processing/interaction_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
import pickle

import numpy as np
from scipy.sparse import coo_matrix


def list_rating_files(directory_path):
"""List of files in the directory that start with mv_."""
for f in os.listdir(directory_path):
if f.startswith("mv_"):
yield os.path.join(directory_path, f)


def create_interaction_matrix(directory_path, num_users, num_movies):
interaction_matrix = coo_matrix((num_users, num_movies), dtype=np.int8)
user_mapper = {}
current_user_index = 0

for file_path in list_rating_files(directory_path):
with open(file_path, "r") as file:
movie_id = int(file.readline().strip().replace(":", ""))
movie_idx = movie_id - 1

for line in file:
user_id, rating, _ = line.strip().split(",")
user_id = int(user_id)
rating = int(rating)

if rating < 4:
continue

if user_id not in user_mapper:
user_mapper[user_id] = current_user_index
current_user_index += 1

user_idx = user_mapper[user_id]
interaction_matrix[user_idx, movie_idx] = rating

return interaction_matrix


def save_matrix(matrix, output_file):
with open(output_file, "wb") as f:
pickle.dump(matrix, f)
print(f"Interaction matrix saved to {output_file}")


def main():
"""Main entry point to create and save the interaction matrix."""

# Configurations
data_directory = os.path.join(os.path.dirname(__file__), "../../data/")
output_directory = os.path.join(data_directory, "../output/")
output_file = os.path.join(output_directory, "interaction_matrix.pkl")

# Number of users and movies
num_users = 480189
num_movies = 17770

# Process Data
interaction_matrix = create_interaction_matrix(
data_directory, num_users, num_movies
)

# Save Data
save_matrix(interaction_matrix, output_file)


if __name__ == "__main__":
main()
1 change: 0 additions & 1 deletion mediabridge/data_processing/preprocess.py

This file was deleted.

0 comments on commit 35709da

Please sign in to comment.