Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interaction matrix of user #10

Merged
merged 26 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
15ac13d
added interaction matrix
siddz415 Aug 23, 2024
05e8dc5
restructured folders
siddz415 Aug 23, 2024
d908840
made changes to the files
siddz415 Sep 1, 2024
b5143b0
added lightfm_recommendation.py
siddz415 Sep 27, 2024
3f6bc40
Merge branch 'main' into interaction-matrix-of-user
siddz415 Oct 25, 2024
1bc61e7
Merge branch 'main' of https://github.com/noisebridge/MediaBridge int…
jhanley634 Nov 9, 2024
08459c1
Merge branch 'main' into interaction-matrix-of-user
jhanley634 Nov 9, 2024
4daa1f2
Merge branch 'interaction-matrix-of-user' of https://github.com/noise…
jhanley634 Nov 9, 2024
4a174de
isort
jhanley634 Nov 9, 2024
a5f952f
Merge branch 'main' into interaction-matrix-of-user
cocomittens Nov 23, 2024
3c72e1a
Merge branch 'main' of https://github.com/noisebridge/MediaBridge int…
cocomittens Nov 23, 2024
69bd478
Remove movie titles
cocomittens Nov 23, 2024
c84efab
Merge branch 'interaction-matrix-of-user' of https://github.com/noise…
cocomittens Nov 23, 2024
05f41d0
Remove mv-01
cocomittens Nov 23, 2024
25f83dd
Remove mv-02
cocomittens Nov 23, 2024
038ad44
Remove extra else statement
cocomittens Nov 23, 2024
432df32
Add sparse matrix, fix files
cocomittens Nov 23, 2024
f04ba20
Comment unused code
cocomittens Nov 23, 2024
6d197bd
Remove unused import
cocomittens Nov 23, 2024
e6d4548
Consolidate all files and functions for interaction matrix into a mor…
audiodude Nov 23, 2024
27d303e
Update matrix
cocomittens Nov 23, 2024
26fde84
Remove errant main file
audiodude Nov 23, 2024
14d136f
Refactor file listing code
audiodude Dec 6, 2024
3655b26
Merge branch 'main' into interaction-matrix-of-user
audiodude Dec 6, 2024
713639a
Ruff format
audiodude Dec 6, 2024
29346a6
Update interaction_matrix.py
cocomittens Dec 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
interaction-matrix-of-user
mv_0000001.txt
mv_0000002.txt
movie_titles.txt
myenv/

data
out
.env
.pytest_cache
__pycache__
__pycache__
main
3 changes: 2 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ python-dotenv = "~=1.0"
requests = "~=2.26"
tqdm = "~=4.66"
typer = "~=0.12"
scipy = "*"

[dev-packages]
pytest = "~=8.3"
Expand All @@ -19,4 +20,4 @@ python_version = "3.12"

[scripts]
dev = "pipenv run python -m mediabridge.main"
test = "pipenv run pytest"
test = "pipenv run pytest"
273 changes: 187 additions & 86 deletions Pipfile.lock

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions lightfm_recommendation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np
from lightfm import LightFM
from lightfm.evaluation import auc_score, precision_at_k
from scipy.sparse import coo_matrix

# Example interaction data
user_ids = [0, 0, 1, 1, 2]
movie_ids = [0, 1, 1, 2, 2]
ratings = [5, 4, 5, 4, 5] # Only 4s and 5s for this example

# Create sparse interaction matrix
interaction_matrix = coo_matrix((ratings, (user_ids, movie_ids)), shape=(3, 3))

# Initialize LightFM model with WARP loss
model = LightFM(loss="warp")

# Train the model
model.fit(interaction_matrix, epochs=30, num_threads=2)

# Predict scores for User 0
scores = model.predict(0, np.arange(3))
top_items = np.argsort(-scores)
print("Top recommended items for User 0:", top_items)


precision = precision_at_k(model, interaction_matrix, k=5).mean()
auc = auc_score(model, interaction_matrix).mean()

print(f"Precision at k=5: {precision}, AUC Score: {auc}")
1 change: 0 additions & 1 deletion mediabridge/data_processing/build_matrices.py

This file was deleted.

1 change: 0 additions & 1 deletion mediabridge/data_processing/credentials

This file was deleted.

71 changes: 71 additions & 0 deletions mediabridge/data_processing/interaction_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
import pickle

import numpy as np
from scipy.sparse import coo_matrix


def list_rating_files(directory_path):
"""List of files in the directory that start with mv_."""
for f in os.listdir(directory_path):
if f.startswith("mv_"):
yield os.path.join(directory_path, f)


def create_interaction_matrix(directory_path, num_users, num_movies):
interaction_matrix = coo_matrix((num_users, num_movies), dtype=np.int8)
user_mapper = {}
current_user_index = 0

for file_path in list_rating_files(directory_path):
with open(file_path, "r") as file:
movie_id = int(file.readline().strip().replace(":", ""))
movie_idx = movie_id - 1

for line in file:
user_id, rating, _ = line.strip().split(",")
user_id = int(user_id)
rating = int(rating)

if rating < 4:
continue

if user_id not in user_mapper:
user_mapper[user_id] = current_user_index
current_user_index += 1

user_idx = user_mapper[user_id]
interaction_matrix[user_idx, movie_idx] = rating

return interaction_matrix


def save_matrix(matrix, output_file):
with open(output_file, "wb") as f:
pickle.dump(matrix, f)
print(f"Interaction matrix saved to {output_file}")


def main():
"""Main entry point to create and save the interaction matrix."""

# Configurations
data_directory = os.path.join(os.path.dirname(__file__), "../../data/")
output_directory = os.path.join(data_directory, "../output/")
output_file = os.path.join(output_directory, "interaction_matrix.pkl")

# Number of users and movies
num_users = 480189
num_movies = 17770

# Process Data
interaction_matrix = create_interaction_matrix(
data_directory, num_users, num_movies
)

# Save Data
save_matrix(interaction_matrix, output_file)


if __name__ == "__main__":
main()
1 change: 0 additions & 1 deletion mediabridge/data_processing/preprocess.py

This file was deleted.

Loading