macrocosm-os · steffencruz · Jan 19, 2024 · Jan 16, 2024 · Jan 16, 2024 · Jan 17, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -28,7 +28,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install flake8 pytest black
-        pip install -r requirements.txt
+        pip install -e .
         pip install -r prompting/requirements.txt
 
     - name: Lint with flake8
@@ -42,4 +42,5 @@ jobs:
         black .
     - name: Test with pytest
       run: |
-        pytest
+        # run tests in tests/ dir and only fail if there are failures or errors
+        pytest tests/ --verbose --failed-first --exitfirst --disable-warnings
diff --git a/README.md b/README.md
@@ -92,7 +92,7 @@ This repository requires python3.8 or higher. To install, simply clone this repo
 ```bash
 git clone https://github.com/opentensor/prompting.git
 cd prompting
-python -m pip install -r requirements.txt
+python -m pip install -r requirements.txt -r prompting/requirements.txt
 python -m pip install -e .
 ```
 
@@ -107,7 +107,7 @@ Prior to running a miner or validator, you must [create a wallet](https://github
 
 The validator and base miner are based on [zephyr](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta), which is a fine-tuned Mistral-7B.
 
-**To run a validator or zephyr miner you will need 40GB VRAM (we're working on bringing this down to 32).**
+**To run a validator you will need 24GB of VRAM or 18GB of VRAM for a zephyr miner**
 ```bash
 # To run the validator
 python neurons/validator.py

diff --git a/neurons/miners/zephyr/miner.py b/neurons/miners/zephyr/miner.py
@@ -35,7 +35,7 @@ class ZephyrMiner(Miner):
     """
     Base miner which runs zephyr (https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
 
-    This requires a GPU with at least 40GB of memory.
+    This requires a GPU with at least 20GB of memory.
 
     To run this miner from the project root directory:
 
@@ -45,7 +45,7 @@ class ZephyrMiner(Miner):
     @classmethod
     def add_args(cls, parser: argparse.ArgumentParser):
         """
-        Adds OpenAI-specific arguments to the command line parser.
+        Adds arguments to the command line parser.
         """
         super().add_args(parser)
         parser.add_argument(

diff --git a/prompting/base/neuron.py b/prompting/base/neuron.py
@@ -165,11 +165,11 @@ def should_set_weights(self) -> bool:
         ) > self.config.neuron.epoch_length
 
     def save_state(self):
-        bt.logging.warning(
+        bt.logging.debug(
             "save_state() not implemented for this neuron. You can implement this function to save model checkpoints or other useful data."
         )
 
     def load_state(self):
-        bt.logging.warning(
+        bt.logging.debug(
             "load_state() not implemented for this neuron. You can implement this function to load model checkpoints or other useful data."
         )
diff --git a/prompting/forward.py b/prompting/forward.py
@@ -95,8 +95,6 @@ async def run_step(
         **response_event.__state_dict__(),
     }
 
-    bt.logging.debug(f"Step complete. Event:\n{event}")
-    # BUG: Make sure everything we log is on CPU not GPU
     log_event(self, event)
 
     return event

diff --git a/prompting/rewards/float_diff.py b/prompting/rewards/float_diff.py
@@ -36,18 +36,21 @@ def math_score(reference, completion):
         if pred is None:
             return 0.0
 
-        # Convert reference to float (this is okay because we already checked that the reference is a float)
-        # TODO: More flexible parsing of the reference (just as with the completion)
-        ref = float(reference)
-        if pred == ref:
-            return 1.0
+        try:
 
-        # Compute the difference
-        diff = abs(ref - pred)/(ref + 1e-6)
-        # Make sure the difference is between 0 and 1
-        diff = min(abs(diff), 1)
+            # Convert reference to float (this is okay because we already checked that the reference is a float)
+            # TODO: More flexible parsing of the reference (just as with the completion)
+            ref = float(reference)
+            if pred == ref:
+                return 1.0            
+            # Compute the difference
+            diff = abs(ref - pred)/(ref + 1e-6)
+            # Make sure the difference is between 0 and 1
+            diff = min(abs(diff), 1)
 
-        return 1.0 - diff
+            return 1.0 - diff
+        except Exception:
+            return 0.0
 
 
     def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:

diff --git a/prompting/rewards/relevance.py b/prompting/rewards/relevance.py
@@ -32,13 +32,17 @@ def reward(
         self, reference: str, completions: List[str]
     ) -> BatchRewardOutput:
         reference_embedding = self.model.encode(reference, to_numpy=False)
-        completions_embeddings = self.model.encode(completions, to_numpy=False)
         rewards = []
         timings = []
 
-        for emb in completions_embeddings:
+        for comp in completions:
             t0 = time.time()
-            rewards.append(cosine_similarity(reference_embedding.reshape(1, -1), emb.reshape(1, -1)))
+            score = 0
+            if comp:
+                emb = self.model.encode(comp, to_numpy=False)
+                score = cosine_similarity(reference_embedding.reshape(1, -1), emb.reshape(1, -1))
+
+            rewards.append(score)
             timings.append(time.time() - t0)
 
         output = BatchRewardOutput(

diff --git a/prompting/rewards/reward.py b/prompting/rewards/reward.py
@@ -18,6 +18,7 @@ class RewardEvent:
     """Contains rewards for all the responses in a batch"""
     model_name: str
     rewards: torch.FloatTensor
+    rewards_normalized: torch.FloatTensor
     timings: torch.FloatTensor
     model_type: RewardModelTypeEnum
     batch_time: float
@@ -27,6 +28,7 @@ class RewardEvent:
     def asdict(self) -> dict:
         return {
             f"{self.model_name}_raw_rewards": self.rewards.tolist(),
+            f"{self.model_name}_rewards": self.rewards_normalized.tolist(),
             f"{self.model_name}_timings": self.timings.tolist(),
             f"{self.model_name}_batch_time": self.batch_time,
             f"{self.model_name}_extra_info": self.extra_info,
@@ -119,6 +121,12 @@ class BatchRewardOutput:
     rewards: torch.FloatTensor
     timings: torch.FloatTensor
     extra_info: dict
+
+    def __post_init__(self):
+        if self.rewards.shape != self.timings.shape:
+            raise ValueError(f"rewards.shape {self.rewards.shape} != timings.shape {self.timings.shape}")
+
+        self.rewards_normalized = (self.rewards-self.rewards.min())/(self.rewards.max()-self.rewards.min())
 
 
 class BaseRewardModel(ABC):
@@ -153,6 +161,7 @@ def apply(self, reference: str, response_event) -> RewardEvent:
         return RewardEvent(
             model_name=self.name,
             rewards=batch_rewards_output.rewards,
+            rewards_normalized=batch_rewards_output.rewards_normalized,
             model_type=self.model_type,
             batch_time=batch_rewards_time,
             extra_info=batch_rewards_output.extra_info,

diff --git a/prompting/utils/config.py b/prompting/utils/config.py
@@ -98,7 +98,7 @@ def add_args(cls, parser):
         help="If set, we dont save events to a log file.",
         default=False,
     )
-    
+
     parser.add_argument(
         "--neuron.log_full",
         action="store_true",
@@ -287,10 +287,30 @@ def add_validator_args(cls, parser):
         "--wandb.project_name",
         type=str,
         help="The name of the project where you are sending the new run.",
-        default="synapse_agent_experiments",
+        default="alpha-validators",
+    )
+
+    parser.add_argument(
+        "--wandb.entity",
+        type=str,
+        help="The name of the project where you are sending the new run.",
+        default="opentensor-dev",
     )
 
 
+    parser.add_argument(
+        "--neuron.query_unique_coldkeys",
+        action="store_true",
+        help="Only query a single hotkey per coldkey.",
+        default=False,
+        )
+
+    parser.add_argument(
+        "--neuron.query_unique_ips",
+        action="store_true",
+        help="Only query a single hotkey per ip.",
+        default=False,
+        )
 
 def config(cls):
     """