Now Supports More than 200 Algorithms.

A. Modularized main Quazi-computational engine. 1. UF Engine 2. CB Engine 3. Hybrid Aggregator (which will soon used for aggregating computational results 1. and 2.) -------------------------------------------------------------------------- B. added more test assertion in pkg. ------------------------------------------------------------------------- C. Integrated Format Master to EngineBase. -------------------------------------------------------------------------- D. Structered Modular Arch view of base to Event Driven for future shfiting. -------------------------------------------------------------------------- E. Added Examples.
vishesh9131 · Oct 30, 2024 · 43a2241 · 43a2241
1 parent 2d92bee
commit 43a2241
Show file tree

Hide file tree

Showing 628 changed files with 4,517 additions and 497 deletions.
diff --git a/.gitignore b/.gitignore
@@ -25,3 +25,12 @@ docs/images/.DS_Store
 ./src/SANDBOX/Analysis/data_mother/wgtlabel.csv
 docs/images/.DS_Store
 src/SANDBOX/tempCodeRunnerFile.py
+./src/SANDBOX/Analysis/data_mother/large_network.csv
+./src/SANDBOX/Analysis/data_mother/wgtlabel.csv
+./src/SANDBOX/dataset/dressipi/train_sessions.csv
+./src/SANDBOX/recsys/hanconv/src/SANDBOX/dataset/./src/SANDBOX/Analysis/data_mother/large_network.csv
+./src/SANDBOX/Analysis/data_mother/wgtlabel.csv
+./src/SANDBOX/recsys/hanconv/src/SANDBOX/dataset/IMDB.csv/processed/data.pt
+src/SANDBOX/.DS_Store
+/src/SANDBOX/dataset
+src/SANDBOX/.DS_Store
diff --git a/engine/Tmodel.py → corerec/Tmodel.py b/engine/Tmodel.py → corerec/Tmodel.py
@@ -72,7 +72,7 @@ def compute_neighborhood_similarity(self, adjacency_matrix, x):
 
 
 # # Example usage (Do not consider it in production -vishesh)
-# input_dim = 4
+# input_dim = 4 
 # d_model = 8
 # num_layers = 2
 # num_heads = 2
@@ -83,16 +83,23 @@ def compute_neighborhood_similarity(self, adjacency_matrix, x):
 #                                  [0, 1]])  # Simplified adjacency matrix
 # graph_metrics = torch.tensor([[0.5, 0.5],
 #                               [0.5, 0.5]])  # Simplified graph metrics
-
-# model = GraphTransformerV2(num_layers=num_layers, d_model=d_model, num_heads=num_heads, d_feedforward=d_feedforward, input_dim=input_dim)
+# 
+# model = GraphTransformerV2(num_layers=num_layers, 
+#                             d_model=d_model, 
+#                             num_heads=num_heads, 
+#                             d_feedforward=d_feedforward, 
+#                             input_dim=input_dim)
+# 
 # output = model(x, adjacency_matrix, graph_metrics)
 # print(output)
 
 
 
 ########################## TestingGraphTransformersAugAttention ########################
+import torch
+from torch.nn import Module, Linear, Dropout, LayerNorm, ModuleList
+
 class TestingGraphTransformersAugAttention(Module):
-    # this mentioned method is defined in paper.. [vishesh will provide link to his paper here] 
     def __init__(self, num_layers, d_model, num_heads, d_feedforward, input_dim, num_weights=10, use_weights=True, dropout=0.1):
         super(TestingGraphTransformersAugAttention, self).__init__()
         self.num_weights = num_weights
@@ -101,63 +108,83 @@ def __init__(self, num_layers, d_model, num_heads, d_feedforward, input_dim, num
         self.output_linear = Linear(d_model, input_dim)
         self.dropout = Dropout(dropout)
         self.layer_norm = LayerNorm(d_model)
+        self.num_heads = num_heads
+        self.d_model = d_model
+
+        # Initialize weight linear layers if using weights
         if self.use_weights:
             self.weight_linears = ModuleList([Linear(input_dim, d_model) for _ in range(num_weights)])
 
-        # Attention components (Augumenting KQV(Key Value Pair) matrices)
+        # Attention components (Key-Value-Query matrices)
         self.query_linear = Linear(d_model, d_model)
         self.key_linear = Linear(d_model, d_model)
         self.value_linear = Linear(d_model, d_model)
-        self.num_heads = num_heads
-        self.d_model = d_model
-
+
     def forward(self, x, adjacency_matrix, graph_metrics, weights=None):
+        # Process adjacency matrix and compute direct, neighborhood, and graph structure scores
         adjacency_matrix = adjacency_matrix.float()
         direct_scores = adjacency_matrix @ x
         neighborhood_similarity = self.compute_neighborhood_similarity(adjacency_matrix, x)
         graph_structure_scores = graph_metrics @ x
         dng_scores = direct_scores + neighborhood_similarity + graph_structure_scores
 
+        # Use weighted linear transformations if weights are provided
         if self.use_weights and weights is not None:
-            weighted_x = torch.zeros_like(x)
-            for i, weight in enumerate(weights.T):
-                weighted_x += self.weight_linears[i](x) * weight.unsqueeze(1)
-            x = weighted_x
+            weighted_x = self.apply_weights(x, weights)
         else:
-            x = self.input_linear(x)
+            weighted_x = self.input_linear(x)
 
-        x = self.layer_norm(x)
-        x = self.modified_attention(x, dng_scores)
+        # Normalize and apply modified attention mechanism
+        weighted_x = self.layer_norm(weighted_x)
+        x = self.modified_attention(weighted_x, dng_scores)
         x = self.output_linear(x)
         x = self.dropout(x)
 
+        # Combine DNG scores and output for the final result
         final_scores = F.relu(x + dng_scores)
         return final_scores
 
+    def apply_weights(self, x, weights):
+        # Apply multiple weight transformations to input
+        weighted_x = torch.zeros_like(x)
+        for i, weight in enumerate(weights.T):
+            weighted_x += self.weight_linears[i](x) * weight.unsqueeze(1)
+        return weighted_x
+
     def modified_attention(self, x, dng_scores):
-        # Compute Q, K, V
+        # Compute Q, K, V matrices
         Q = self.query_linear(x)
         K = self.key_linear(x)
         V = self.value_linear(x)
 
-        # Compute attention scores
+        # Compute scaled attention scores
         attention_scores = torch.matmul(Q, K.transpose(-2, -1)) / (self.d_model ** 0.5)
-
-        # Integrate DNG scores into attention scores
+        
+        # Integrate DNG scores into attention mechanism
         attention_scores += dng_scores
 
-        # Apply softmax to get attention weights
+        # Normalize scores using softmax and apply to V
         attention_weights = F.softmax(attention_scores, dim=-1)
-
-        # Compute attention output
         attention_output = torch.matmul(attention_weights, V)
-
+        
         return attention_output
 
     def compute_neighborhood_similarity(self, adjacency_matrix, x):
+        # Compute intersection and union for Jaccard similarity
         intersection = adjacency_matrix @ adjacency_matrix
         row_sums = adjacency_matrix.sum(dim=1, keepdim=True)
         col_sums = adjacency_matrix.sum(dim=0, keepdim=True)
         union = row_sums + col_sums - intersection
         similarity = intersection / (union + 1e-6)
         return similarity @ x
+
+'''KEY CHANGES:
+1.	Weight Application Optimization: Added a separate method apply_weights to modularize and clean up the weight application logic, making it easier to read and maintain.
+2.	Attention Mechanism Adjustments:
+	•	The modified_attention method is enhanced to properly normalize the attention scores using the scaled dot-product approach.
+	•	The integration of dng_scores with attention scores allows the model to factor in graph-specific insights into the attention weights directly.
+3.	Compute Neighborhood Similarity: This method is already well-defined, using Jaccard similarity to account for neighborhood overlap, which aligns well with understanding graph structures.
+	4.	Modularization: Breaking down the logic into smaller, dedicated methods (apply_weights, modified_attention, compute_neighborhood_similarity) improves the readability and testability of the code.
+	5.	Scalability Consideration: For larger graphs, consider using sparse tensors for the adjacency matrix if needed, to handle memory efficiency better.
+
+'''
diff --git a/engine/__init__.py → corerec/__init__.py b/engine/__init__.py → corerec/__init__.py
@@ -48,3 +48,6 @@
 core_rec
     Core recommendation system components.
 """
+
+import corerec.engines.unionizedFilterEngine as UF_Engine
+import corerec.engines.contentFilterEngine as CF_Engine
diff --git a/engine/async_ddp.py → corerec/async_ddp.py b/engine/async_ddp.py → corerec/async_ddp.py
@@ -3,7 +3,7 @@
 # setup, cleanup, Parameterserver, worker 
 
 
-from engine.common_import import *
+from corerec.common_import import *
 from torch.nn import MSELoss
 
 import socket

diff --git a/corerec/base_recommender.py b/corerec/base_recommender.py
@@ -0,0 +1,29 @@
+from abc import ABC, abstractmethod
+from typing import List
+
+class BaseRecommender(ABC):
+    @abstractmethod
+    def fit(self, interaction_matrix, user_ids: List[int], item_ids: List[int]):
+        """
+        Train the recommender system using the provided interaction matrix.
+        
+        Parameters:
+        - interaction_matrix (scipy.sparse matrix): User-item interaction matrix.
+        - user_ids (List[int]): List of user IDs.
+        - item_ids (List[int]): List of item IDs.
+        """
+        pass
+
+    @abstractmethod
+    def recommend(self, user_id: int, top_n: int = 10) -> List[int]:
+        """
+        Generate top-N item recommendations for a given user.
+        
+        Parameters:
+        - user_id (int): The ID of the user.
+        - top_n (int): The number of recommendations to generate.
+        
+        Returns:
+        - List[int]: List of recommended item IDs.
+        """
+        pass 
diff --git a/engine/common_import.py → corerec/common_import.py b/engine/common_import.py → corerec/common_import.py
diff --git a/corerec/config/cr_contentFilterEngine_config.py b/corerec/config/cr_contentFilterEngine_config.py
@@ -0,0 +1,6 @@
+CONFIG = {
+    "method": "tfidf",
+    "params": {
+        "feature_matrix": None
+    }
+}
diff --git a/corerec/config/cr_unionizedFilterEngine_config.py b/corerec/config/cr_unionizedFilterEngine_config.py
@@ -0,0 +1,11 @@
+CONFIG = {
+    "collaborative_filtering": {
+        "method": "matrix_factorization",  # Options: matrix_factorization, user_based_cf, etc.
+        "params": {
+            "num_factors": 20,
+            "learning_rate": 0.01,
+            "regularization": 0.02,
+            "epochs": 20
+        }
+    }
+}
diff --git a/corerec/config/recommender_config.py b/corerec/config/recommender_config.py
@@ -0,0 +1,11 @@
+CONFIG = {
+    "collaborative_filtering": {
+        "method": "matrix_factorization",  # Options: matrix_factorization, user_based_cf, etc.
+        "params": {
+            "num_factors": 20,
+            "learning_rate": 0.01,
+            "regularization": 0.02,
+            "epochs": 20
+        }
+    }
+}
diff --git a/engine/core_rec.py → corerec/core_rec.py b/engine/core_rec.py → corerec/core_rec.py
@@ -8,43 +8,43 @@
 #     5. draw_graph: A function to visualize graphs with options to highlight top nodes and recommended nodes.
 # Note: This module integrates PyTorch for model training and evaluation, and NetworkX for graph manipulation.
 # ###############################################################################################################
-from engine.common_import import *
-from engine.async_ddp import *
+from corerec.common_import import *
+from corerec.async_ddp import *
 
 # This is the Core of your model
-from engine.models import *
-from engine.Tmodel import GraphTransformerV2
-from engine.cr_pkg.gat_conv import *
-from engine.cr_pkg.gcn_conv import *
-from engine.cr_pkg.han_conv import *
-from engine.cr_pkg.sage_conv import *
+from corerec.models import *
+from corerec.Tmodel import GraphTransformerV2
+from corerec.cr_pkg.gat_conv import *
+from corerec.cr_pkg.gcn_conv import *
+from corerec.cr_pkg.han_conv import *
+from corerec.cr_pkg.sage_conv import *
 
 
 # In Emergence this will act as Organs to your model
-from engine.train import train_model
-from engine.predict import predict, explainable_predict
-from engine.metrics import jaccard_similarity, adamic_adar_index, aaj_accuracy
+from corerec.train import train_model
+from corerec.predict import predict, explainable_predict
+from corerec.metrics import jaccard_similarity, adamic_adar_index, aaj_accuracy
 
 # Importing dataloaders,dataset
-from engine.cr_utility.dataloader import *
-from engine.cr_utility.dataset import *    #dont call this its not working rn use GraphDataset ookk
+from corerec.cr_utility.dataloader import *
+from corerec.cr_utility.dataset import *    #dont call this its not working rn use GraphDataset ookk
 
 # Importing Boosters AKA Optimizers (Note: _funtional and _init_ is commented)
-from engine.cr_boosters.adam import *
-from engine.cr_boosters.nadam import *
-from engine.cr_boosters.adamax import *
-from engine.cr_boosters.optimizer import *
-from engine.cr_boosters.adadelta import *
-from engine.cr_boosters.adagrad import *
-from engine.cr_boosters.asgd import *
-from engine.cr_boosters.lbfgs import *
-from engine.cr_boosters.rmsprop import *
-from engine.cr_boosters.sgd import *
-from engine.cr_boosters.sparse_adam import *
+from corerec.cr_boosters.adam import *
+from corerec.cr_boosters.nadam import *
+from corerec.cr_boosters.adamax import *
+from corerec.cr_boosters.optimizer import *
+from corerec.cr_boosters.adadelta import *
+from corerec.cr_boosters.adagrad import *
+from corerec.cr_boosters.asgd import *
+from corerec.cr_boosters.lbfgs import *
+from corerec.cr_boosters.rmsprop import *
+from corerec.cr_boosters.sgd import *
+from corerec.cr_boosters.sparse_adam import *
 
 
 #Promoted this script to engine.cr_utility.dataset
-from engine.datasets import GraphDataset
+from corerec.datasets import GraphDataset
 
 
 #FormatMaster is the plug for corerec preprocessing to detect dataset format and category

diff --git a/engine/cr_boosters/__init__.py → corerec/cr_boosters/__init__.py b/engine/cr_boosters/__init__.py → corerec/cr_boosters/__init__.py
diff --git a/engine/cr_boosters/_functional.py → corerec/cr_boosters/_functional.py b/engine/cr_boosters/_functional.py → corerec/cr_boosters/_functional.py
diff --git a/engine/cr_boosters/_multi_tensor/__init__.py → ...rec/cr_boosters/_multi_tensor/__init__.py b/engine/cr_boosters/_multi_tensor/__init__.py → ...rec/cr_boosters/_multi_tensor/__init__.py
diff --git a/...ne/cr_boosters/_multi_tensor/__init__.pyi → ...ec/cr_boosters/_multi_tensor/__init__.pyi b/...ne/cr_boosters/_multi_tensor/__init__.pyi → ...ec/cr_boosters/_multi_tensor/__init__.pyi
diff --git a/engine/cr_boosters/adadelta.py → corerec/cr_boosters/adadelta.py b/engine/cr_boosters/adadelta.py → corerec/cr_boosters/adadelta.py
@@ -4,7 +4,7 @@
 import torch
 from torch import Tensor
 
-from engine.cr_boosters.optimizer import (
+from corerec.cr_boosters.optimizer import (
     _capturable_doc,
     _default_to_fused_or_foreach,
     _differentiable_doc,

diff --git a/engine/cr_boosters/adagrad.py → corerec/cr_boosters/adagrad.py b/engine/cr_boosters/adagrad.py → corerec/cr_boosters/adagrad.py
@@ -3,8 +3,8 @@
 
 import torch
 from torch import Tensor
-from engine.torch_utils._foreach_utils import _get_fused_kernels_supported_devices
-from engine.cr_boosters.optimizer import (
+from corerec.torch_utils._foreach_utils import _get_fused_kernels_supported_devices
+from corerec.cr_boosters.optimizer import (
     _default_to_fused_or_foreach,
     _differentiable_doc,
     _foreach_doc,

diff --git a/engine/cr_boosters/adam.py → corerec/cr_boosters/adam.py b/engine/cr_boosters/adam.py → corerec/cr_boosters/adam.py
@@ -13,7 +13,7 @@ def _get_fused_kernels_supported_devices():
         return ["mps", "cuda", "xpu", "cpu"]
 
 
-from engine.cr_boosters.optimizer import (
+from corerec.cr_boosters.optimizer import (
     _default_to_fused_or_foreach,
     _differentiable_doc,
     _disable_dynamo_if_unsupported,

diff --git a/engine/cr_boosters/adamax.py → corerec/cr_boosters/adamax.py b/engine/cr_boosters/adamax.py → corerec/cr_boosters/adamax.py
@@ -4,7 +4,7 @@
 import torch
 from torch import Tensor
 
-from engine.cr_boosters.optimizer import (
+from corerec.cr_boosters.optimizer import (
     _capturable_doc,
     _default_to_fused_or_foreach,
     _differentiable_doc,

diff --git a/engine/cr_boosters/asgd.py → corerec/cr_boosters/asgd.py b/engine/cr_boosters/asgd.py → corerec/cr_boosters/asgd.py
@@ -4,7 +4,7 @@
 import torch
 from torch import Tensor
 
-from engine.cr_boosters.optimizer import (
+from corerec.cr_boosters.optimizer import (
     _capturable_doc,
     _default_to_fused_or_foreach,
     _differentiable_doc,

diff --git a/engine/cr_boosters/lbfgs.py → corerec/cr_boosters/lbfgs.py b/engine/cr_boosters/lbfgs.py → corerec/cr_boosters/lbfgs.py
@@ -2,7 +2,7 @@
 from typing import Optional
 
 import torch
-from engine.cr_boosters.optimizer import Optimizer, ParamsT
+from corerec.cr_boosters.optimizer import Optimizer, ParamsT
 
 __all__ = ["LBFGS"]
 

diff --git a/engine/cr_boosters/lr_scheduler.py → corerec/cr_boosters/lr_scheduler.py b/engine/cr_boosters/lr_scheduler.py → corerec/cr_boosters/lr_scheduler.py
@@ -24,7 +24,7 @@
 
 from torch import inf, Tensor
 
-from engine.cr_boosters.optimizer import Optimizer
+from corerec.cr_boosters.optimizer import Optimizer
 
 __all__ = [
     "LambdaLR",

diff --git a/engine/cr_boosters/nadam.py → corerec/cr_boosters/nadam.py b/engine/cr_boosters/nadam.py → corerec/cr_boosters/nadam.py
@@ -4,7 +4,7 @@
 
 import torch
 from torch import Tensor
-from engine.cr_boosters.optimizer import (
+from corerec.cr_boosters.optimizer import (
     _capturable_doc,
     _default_to_fused_or_foreach,
     _differentiable_doc,