From 4045687dfe673b9770db1f2764e3c226251415ec Mon Sep 17 00:00:00 2001
From: Steffen Cruz <steffenjcruz@gmail.com>
Date: Mon, 15 Jan 2024 21:07:31 -0600
Subject: [PATCH 01/23] Adds config to select only uids with unique
 coldkeys/ips to prevent network caching

---
 prompting/utils/config.py | 15 +++++++++++-
 prompting/utils/uids.py   | 39 +++++++++++++++++++++----------
 tests/test_uids.py        | 48 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_uids.py

diff --git a/prompting/utils/config.py b/prompting/utils/config.py
index 0c5c243b..4a19142d 100644
--- a/prompting/utils/config.py
+++ b/prompting/utils/config.py
@@ -97,7 +97,7 @@ def add_args(cls, parser):
         help="If set, we dont save events to a log file.",
         default=False,
     )
-    
+
     parser.add_argument(
         "--neuron.log_full",
         action="store_true",
@@ -288,6 +288,19 @@ def add_validator_args(cls, parser):
             default=4096,
         )
 
+    parser.add_argument(
+        "--neuron.query_unique_coldkeys",
+        action="store_true",
+        help="Only query a single hotkey per coldkey.",
+        default=False,
+        )
+
+    parser.add_argument(
+        "--neuron.query_unique_ips",
+        action="store_true",
+        help="Only query a single hotkey per ip.",
+        default=False,
+        )
 
 def config(cls):
     """
diff --git a/prompting/utils/uids.py b/prompting/utils/uids.py
index e007dffd..057a825a 100644
--- a/prompting/utils/uids.py
+++ b/prompting/utils/uids.py
@@ -5,13 +5,15 @@
 
 
 def check_uid_availability(
-    metagraph: "bt.metagraph.Metagraph", uid: int, vpermit_tao_limit: int
+    metagraph: "bt.metagraph.Metagraph", uid: int, vpermit_tao_limit: int, coldkeys: set = None, ips: set = None,
 ) -> bool:
     """Check if uid is available. The UID should be available if it is serving and has less than vpermit_tao_limit stake
     Args:
         metagraph (:obj: bt.metagraph.Metagraph): Metagraph object
         uid (int): uid to be checked
         vpermit_tao_limit (int): Validator permit tao limit
+        coldkeys (set): Set of coldkeys to exclude
+        ips (set): Set of ips to exclude
     Returns:
         bool: True if uid is available, False otherwise
     """
@@ -20,11 +22,16 @@ def check_uid_availability(
         bt.logging.warning(f"uid: {uid} is not serving")
         return False
     # Filter validator permit > 1024 stake.
-    if metagraph.validator_permit[uid]:
-        bt.logging.warning(f"uid: {uid} has validator permit")
-        if metagraph.S[uid] > vpermit_tao_limit:
-            bt.logging.warning(f"uid: {uid} has stake ({metagraph.S[uid]}) > {vpermit_tao_limit}")
+    if metagraph.validator_permit[uid] and metagraph.S[uid] > vpermit_tao_limit:
+            bt.logging.warning(f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}")
             return False
+
+    if coldkeys and metagraph.axons[uid].coldkey in coldkeys:
+        return False
+
+    if ips and metagraph.axons[uid].ip in ips:
+        return False
+
     # Available otherwise.
     return True
 
@@ -43,19 +50,27 @@ def get_random_uids(
     """
     candidate_uids = []
     avail_uids = []
-
+    coldkeys = set()
+    ips = set()
     for uid in range(self.metagraph.n.item()):
         if uid == self.uid:
             continue
+
         uid_is_available = check_uid_availability(
-            self.metagraph, uid, self.config.neuron.vpermit_tao_limit
+            self.metagraph, uid, self.config.neuron.vpermit_tao_limit, coldkeys, ips,
         )
-        uid_is_not_excluded = exclude is None or uid not in exclude
+        if not uid_is_available:
+            continue
+
+        if self.config.neuron.query_unique_coldkeys:
+            coldkeys.add(self.metagraph.axons[uid].coldkey)
+
+        if self.config.neuron.query_unique_ips:
+            ips.add(self.metagraph.axons[uid].ip)
 
-        if uid_is_available:
-            avail_uids.append(uid)
-            if uid_is_not_excluded:
-                candidate_uids.append(uid)
+        avail_uids.append(uid)
+        if exclude is None or uid not in exclude:
+            candidate_uids.append(uid)
 
     # Check if candidate_uids contain enough for querying, if not grab all avaliable uids
     available_uids = candidate_uids
diff --git a/tests/test_uids.py b/tests/test_uids.py
new file mode 100644
index 00000000..79e5b66f
--- /dev/null
+++ b/tests/test_uids.py
@@ -0,0 +1,48 @@
+
+import torch
+import pytest
+from types import SimpleNamespace
+from prompting.utils.uids import get_random_uids
+
+
+def make_mock_neuron(unique_coldkeys=False, unique_ips=False, vpermit_tao_limit=1000):
+
+    axons = [
+        SimpleNamespace(coldkey="a", ip="0.0.0.1", is_serving=True),
+        SimpleNamespace(coldkey="a", ip="0.0.0.0", is_serving=True),
+        SimpleNamespace(coldkey="b", ip="0.0.0.1", is_serving=True),
+        SimpleNamespace(coldkey="b", ip="0.0.0.0", is_serving=True),
+        SimpleNamespace(coldkey="c", ip="0.0.0.2", is_serving=True),
+    ]
+    metagraph = SimpleNamespace(
+        axons = axons,
+        validator_permit = torch.ones(len(axons), dtype=torch.bool),
+        S = torch.zeros(len(axons)),
+        n = torch.tensor(len(axons))
+    )
+
+    return SimpleNamespace(
+        uid = 4,
+        config = SimpleNamespace(
+            neuron = SimpleNamespace(
+                vpermit_tao_limit = vpermit_tao_limit,
+                query_unique_coldkeys = unique_coldkeys,
+                query_unique_ips = unique_ips,
+            )
+        ),
+        metagraph = metagraph
+    )
+
+@pytest.mark.parametrize(
+    "unique_coldkeys, unique_ips, k, expected_result", [
+        (False, False, 4, [0, 1, 2, 3]),
+        (True, False, 2, [0, 2]),
+        (False, True, 2, [0, 1]),
+        (True, True, 2, [0, 3])
+        ])
+def test_check_uid_availability(unique_coldkeys, unique_ips, k, expected_result):
+
+    mock_neuron = make_mock_neuron(unique_coldkeys, unique_ips)
+
+    assert sorted(get_random_uids(mock_neuron, k).tolist()) == expected_result, "Incorrect uids returned."
+

From 839f5ed9969c4d21fcb447ffe5345cb8f47961e3 Mon Sep 17 00:00:00 2001
From: Steffen Cruz <steffenjcruz@gmail.com>
Date: Mon, 15 Jan 2024 21:16:55 -0600
Subject: [PATCH 02/23] Rename test

---
 tests/test_uids.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_uids.py b/tests/test_uids.py
index 79e5b66f..18bcd22d 100644
--- a/tests/test_uids.py
+++ b/tests/test_uids.py
@@ -40,7 +40,7 @@ def make_mock_neuron(unique_coldkeys=False, unique_ips=False, vpermit_tao_limit=
         (False, True, 2, [0, 1]),
         (True, True, 2, [0, 3])
         ])
-def test_check_uid_availability(unique_coldkeys, unique_ips, k, expected_result):
+def test_get_random_uids(unique_coldkeys, unique_ips, k, expected_result):
 
     mock_neuron = make_mock_neuron(unique_coldkeys, unique_ips)
 

From 356c633d64431fa61ca6d613fc38973344ab1da9 Mon Sep 17 00:00:00 2001
From: p-ferreira <pe_drojunior@hotmail.com>
Date: Wed, 17 Jan 2024 16:46:28 +0000
Subject: [PATCH 03/23] Hey....

---
 scripts/run.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/run.py b/scripts/run.py
index d711223c..d35f5b3d 100644
--- a/scripts/run.py
+++ b/scripts/run.py
@@ -1,5 +1,6 @@
 import subprocess
 
+# hi mom!
 coldkey = 'sn1-test'
 netuid = 61
 network = 'test'

From a8c2c1e4ae9a68e86c1e71394e16ae89af32b22e Mon Sep 17 00:00:00 2001
From: Steffen Cruz <steffenjcruz@gmail.com>
Date: Thu, 18 Jan 2024 15:28:39 -0600
Subject: [PATCH 04/23] Set relevance to zero when completion is empty, and
 lower the logging level of save state to debug

---
 prompting/base/neuron.py       |  4 ++--
 prompting/rewards/relevance.py | 10 +++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/prompting/base/neuron.py b/prompting/base/neuron.py
index 2c85ce08..002f7a44 100644
--- a/prompting/base/neuron.py
+++ b/prompting/base/neuron.py
@@ -165,11 +165,11 @@ def should_set_weights(self) -> bool:
         ) > self.config.neuron.epoch_length
 
     def save_state(self):
-        bt.logging.warning(
+        bt.logging.debug(
             "save_state() not implemented for this neuron. You can implement this function to save model checkpoints or other useful data."
         )
 
     def load_state(self):
-        bt.logging.warning(
+        bt.logging.debug(
             "load_state() not implemented for this neuron. You can implement this function to load model checkpoints or other useful data."
         )
diff --git a/prompting/rewards/relevance.py b/prompting/rewards/relevance.py
index 072ae72b..f327f636 100644
--- a/prompting/rewards/relevance.py
+++ b/prompting/rewards/relevance.py
@@ -32,13 +32,17 @@ def reward(
         self, reference: str, completions: List[str]
     ) -> BatchRewardOutput:
         reference_embedding = self.model.encode(reference, to_numpy=False)
-        completions_embeddings = self.model.encode(completions, to_numpy=False)
         rewards = []
         timings = []
 
-        for emb in completions_embeddings:
+        for comp in completions:
             t0 = time.time()
-            rewards.append(cosine_similarity(reference_embedding.reshape(1, -1), emb.reshape(1, -1)))
+            score = 0
+            if comp:
+                emb = self.model.encode(completions, to_numpy=False)
+                score = cosine_similarity(reference_embedding.reshape(1, -1), emb.reshape(1, -1))
+
+            rewards.append(score)
             timings.append(time.time() - t0)
 
         output = BatchRewardOutput(

From 878a181f4576cf0d569d31711a68f98f0caffcd9 Mon Sep 17 00:00:00 2001
From: Steffen Cruz <steffenjcruz@gmail.com>
Date: Thu, 18 Jan 2024 15:37:08 -0600
Subject: [PATCH 05/23] Fix bug where relevance was calculated w.r.t. all
 completions instead of a specific completion

---
 prompting/rewards/relevance.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prompting/rewards/relevance.py b/prompting/rewards/relevance.py
index f327f636..4fa01484 100644
--- a/prompting/rewards/relevance.py
+++ b/prompting/rewards/relevance.py
@@ -39,7 +39,7 @@ def reward(
             t0 = time.time()
             score = 0
             if comp:
-                emb = self.model.encode(completions, to_numpy=False)
+                emb = self.model.encode(comp, to_numpy=False)
                 score = cosine_similarity(reference_embedding.reshape(1, -1), emb.reshape(1, -1))
 
             rewards.append(score)

From 6988219845964182480656a4e9259cd0aabd9007 Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Thu, 18 Jan 2024 21:49:29 +0000
Subject: [PATCH 06/23] Add prompting/requirements to pip install

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 1fde95b2..b36eefc6 100644
--- a/README.md
+++ b/README.md
@@ -92,7 +92,7 @@ This repository requires python3.8 or higher. To install, simply clone this repo
 ```bash
 git clone https://github.com/opentensor/prompting.git
 cd prompting
-python -m pip install -r requirements.txt
+python -m pip install -r requirements.txt -r prompting/requirements.txt
 python -m pip install -e .
 ```
 
@@ -107,7 +107,7 @@ Prior to running a miner or validator, you must [create a wallet](https://github
 
 The validator and base miner are based on [zephyr](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta), which is a fine-tuned Mistral-7B.
 
-**To run a validator or zephyr miner you will need 40GB VRAM (we're working on bringing this down to 32).**
+**To run a validator or zephyr miner you will need 20GB VRAM**
 ```bash
 # To run the validator
 python neurons/validator.py

From afe4a92da23414a43d768591e28fcdee0854ad73 Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Thu, 18 Jan 2024 21:49:44 +0000
Subject: [PATCH 07/23] Fix docstrings

---
 neurons/miners/zephyr/miner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neurons/miners/zephyr/miner.py b/neurons/miners/zephyr/miner.py
index 76c9277d..2d75aa8a 100644
--- a/neurons/miners/zephyr/miner.py
+++ b/neurons/miners/zephyr/miner.py
@@ -35,7 +35,7 @@ class ZephyrMiner(Miner):
     """
     Base miner which runs zephyr (https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
     
-    This requires a GPU with at least 40GB of memory.
+    This requires a GPU with at least 20GB of memory.
     
     To run this miner from the project root directory:
     
@@ -45,7 +45,7 @@ class ZephyrMiner(Miner):
     @classmethod
     def add_args(cls, parser: argparse.ArgumentParser):
         """
-        Adds OpenAI-specific arguments to the command line parser.
+        Adds arguments to the command line parser.
         """
         super().add_args(parser)
         parser.add_argument(

From 3192dcc3e6353c87ba1bf4b53ed32f15c4705d8d Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Thu, 18 Jan 2024 22:08:45 +0000
Subject: [PATCH 08/23] Fix saying only 20 GB VRAM needed

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b36eefc6..d28e41fd 100644
--- a/README.md
+++ b/README.md
@@ -107,7 +107,7 @@ Prior to running a miner or validator, you must [create a wallet](https://github
 
 The validator and base miner are based on [zephyr](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta), which is a fine-tuned Mistral-7B.
 
-**To run a validator or zephyr miner you will need 20GB VRAM**
+**To run a validator you will need 24GB of VRAM or 18GB of VRAM for a zephyr miner**
 ```bash
 # To run the validator
 python neurons/validator.py

From 7da51d3bbae973244c7d9272be584af326451a0b Mon Sep 17 00:00:00 2001
From: Steffen Cruz <steffenjcruz@gmail.com>
Date: Thu, 18 Jan 2024 16:17:23 -0600
Subject: [PATCH 09/23] Remove template test

---
 tests/test_template_validator.py | 112 -------------------------------
 1 file changed, 112 deletions(-)
 delete mode 100644 tests/test_template_validator.py

diff --git a/tests/test_template_validator.py b/tests/test_template_validator.py
deleted file mode 100644
index 2ce7ac2c..00000000
--- a/tests/test_template_validator.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# The MIT License (MIT)
-# Copyright © 2023 Yuma Rao
-# Copyright © 2023 Opentensor Foundation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
-# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
-# the Software.
-
-# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
-# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-import sys
-import torch
-import unittest
-import bittensor as bt
-
-from neurons.validator import Neuron as Validator
-from neurons.miner import Neuron as Miner
-
-from prompting.protocol import Dummy
-from prompting.validator.forward import forward
-from prompting.utils.uids import get_random_uids
-from prompting.validator.reward import get_rewards
-from prompting.base.validator import BaseValidatorNeuron
-
-
-class TemplateValidatorNeuronTestCase(unittest.TestCase):
-    """
-    This class contains unit tests for the RewardEvent classes.
-
-    The tests cover different scenarios where completions may or may not be successful and the reward events are checked that they don't contain missing values.
-    The `reward` attribute of all RewardEvents is expected to be a float, and the `is_filter_model` attribute is expected to be a boolean.
-    """
-
-    def setUp(self):
-        sys.argv = sys.argv[0] + ["--config", "tests/configs/validator.json"]
-
-        config = BaseValidatorNeuron.config()
-        config.wallet._mock = True
-        config.metagraph._mock = True
-        config.subtensor._mock = True
-        self.neuron = Validator(config)
-        self.miner_uids = get_random_uids(self, k=10)
-
-    def test_run_single_step(self):
-        # TODO: Test a single step
-        pass
-
-    def test_sync_error_if_not_registered(self):
-        # TODO: Test that the validator throws an error if it is not registered on metagraph
-        pass
-
-    def test_forward(self):
-        # TODO: Test that the forward function returns the correct value
-        pass
-
-    def test_dummy_responses(self):
-        # TODO: Test that the dummy responses are correctly constructed
-
-        responses = self.neuron.dendrite.query(
-            # Send the query to miners in the network.
-            axons=[self.neuron.metagraph.axons[uid] for uid in self.miner_uids],
-            # Construct a dummy query.
-            synapse=Dummy(dummy_input=self.neuron.step),
-            # All responses have the deserialize function called on them before returning.
-            deserialize=True,
-        )
-
-        for i, response in enumerate(responses):
-            self.assertEqual(response, self.neuron.step * 2)
-
-    def test_reward(self):
-        # TODO: Test that the reward function returns the correct value
-        responses = self.dendrite.query(
-            # Send the query to miners in the network.
-            axons=[self.metagraph.axons[uid] for uid in self.miner_uids],
-            # Construct a dummy query.
-            synapse=Dummy(dummy_input=self.neuron.step),
-            # All responses have the deserialize function called on them before returning.
-            deserialize=True,
-        )
-
-        rewards = get_rewards(self.neuron, responses)
-        expected_rewards = torch.FloatTensor([1.0] * len(responses))
-        self.assertEqual(rewards, expected_rewards)
-
-    def test_reward_with_nan(self):
-        # TODO: Test that NaN rewards are correctly sanitized
-        # TODO: Test that a bt.logging.warning is thrown when a NaN reward is sanitized
-        responses = self.dendrite.query(
-            # Send the query to miners in the network.
-            axons=[self.metagraph.axons[uid] for uid in self.miner_uids],
-            # Construct a dummy query.
-            synapse=Dummy(dummy_input=self.neuron.step),
-            # All responses have the deserialize function called on them before returning.
-            deserialize=True,
-        )
-
-        rewards = get_rewards(self.neuron, responses)
-        expected_rewards = rewards.clone()
-        # Add NaN values to rewards
-        rewards[0] = float("nan")
-
-        with self.assertLogs(bt.logging, level="WARNING") as cm:
-            self.neuron.update_scores(rewards, self.miner_uids)

From 16a398894223bdfc9fc4f61d62e72347981bb956 Mon Sep 17 00:00:00 2001
From: Steffen Cruz <steffenjcruz@gmail.com>
Date: Thu, 18 Jan 2024 16:59:02 -0600
Subject: [PATCH 10/23] Remove circle CI

---
 .circleci/config.yml | 168 -------------------------------------------
 1 file changed, 168 deletions(-)
 delete mode 100644 .circleci/config.yml

diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index 0473afe6..00000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,168 +0,0 @@
-version: 2.1
-
-orbs:
-  python: circleci/python@2.1.1
-  python-lib: dialogue/python-lib@0.1.55
-  # coveralls: coveralls/coveralls@1.0.6
-
-jobs:
-  black:
-    resource_class: small
-    parameters:
-      python-version:
-        type: string
-    docker:
-      - image: cimg/python:<< parameters.python-version >>
-
-    steps:
-      - checkout
-
-      - restore_cache:
-          name: Restore cached black venv
-          keys:
-            - v1-pypi-py-black-<< parameters.python-version >>
-
-      - run:
-          name: Update & Activate black venv
-          command: |
-            python -m venv env/
-            . env/bin/activate
-            python -m pip install --upgrade pip
-            pip install black
-
-      - save_cache:
-          name: Save cached black venv
-          paths:
-            - "env/"
-          key: v1-pypi-py-black-<< parameters.python-version >>
-
-      - run:
-          name: Black format check
-          command: |
-            . env/bin/activate
-            black --line-length 79 --exclude '(env|venv|.eggs)' --check .
-
-  pylint:
-    resource_class: small
-    parameters:
-      python-version:
-        type: string
-    docker:
-      - image: cimg/python:<< parameters.python-version >>
-
-    steps:
-      - checkout
-
-      - run:
-          name: Install Pylint
-          command: |
-            python -m venv env/
-            . env/bin/activate
-            pip install pylint
-
-      - run:
-          name: Pylint check
-          command: |
-            . env/bin/activate
-            pylint --fail-on=W,E,F --exit-zero  ./
-
-  check_compatibility:
-    parameters:
-      python_version:
-        type: string
-    docker:
-      - image: cimg/python:3.10
-    steps:
-      - checkout
-      - run:
-          name: Check if requirements files have changed
-          command: ./scripts/check_requirements_changes.sh
-      - run:
-          name: Install dependencies and Check compatibility
-          command: |
-            if [ "$REQUIREMENTS_CHANGED" == "true" ]; then
-              sudo apt-get update
-              sudo apt-get install -y jq curl
-              ./scripts/check_compatibility.sh << parameters.python_version >>
-            else
-              echo "Skipping compatibility checks..."
-            fi
-
-  build:
-    resource_class: medium
-    parallelism: 2
-    parameters:
-      python-version:
-        type: string
-    docker:
-      - image: cimg/python:<< parameters.python-version >>
-
-    steps:
-      - checkout
-
-      - restore_cache:
-          name: Restore cached venv
-          keys:
-            - v1-pypi-py<< parameters.python-version >>-{{ checksum "requirements.txt" }}
-            - v1-pypi-py<< parameters.python-version >>
-
-      - run:
-          name: Update & Activate venv
-          command: |
-            python -m venv env/
-            . env/bin/activate
-            python -m pip install --upgrade pip
-
-      - save_cache:
-          name: Save cached venv
-          paths:
-            - "env/"
-          key: v1-pypi-py<< parameters.python-version >>-{{ checksum "requirements.txt" }}
-
-      - run:
-          name: Install Bittensor Subnet Template
-          command: |
-            . env/bin/activate
-            pip install -e .
-
-      - store_test_results:
-          path: test-results
-      - store_artifacts:
-          path: test-results
-
-  coveralls:
-    docker:
-      - image: cimg/python:3.10
-    steps:
-      - run:
-          name: Combine Coverage
-          command: |
-            pip3 install --upgrade coveralls
-            coveralls --finish --rcfile .coveragerc || echo "Failed to upload coverage"
-
-workflows:
-  compatibility_checks:
-    jobs:
-      - check_compatibility:
-          python_version: "3.8"
-          name: check-compatibility-3.8
-      - check_compatibility:
-          python_version: "3.9"
-          name: check-compatibility-3.9
-      - check_compatibility:
-          python_version: "3.10"
-          name: check-compatibility-3.10
-      - check_compatibility:
-          python_version: "3.11"
-          name: check-compatibility-3.11
-
-  pr-requirements:
-    jobs:
-      - black:
-          python-version: "3.8.12"
-      - pylint:
-          python-version: "3.8.12"
-      - build:
-          matrix:
-            parameters:
-              python-version: ["3.9.13", "3.10.6", "3.11.4"]

From 5b4f73786d090ceb49df3a4bae05b693ec984536 Mon Sep 17 00:00:00 2001
From: Steffen Cruz <steffenjcruz@gmail.com>
Date: Thu, 18 Jan 2024 17:00:08 -0600
Subject: [PATCH 11/23] Only run tests in tests/ dir and only fail on error and
 failure

---
 .github/workflows/python-package.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index b0a00f12..2f1bd463 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -42,4 +42,5 @@ jobs:
         black .
     - name: Test with pytest
       run: |
-        pytest
+        # run tests in tests/ dir and only fail if there are failures or errors
+        pytest tests/ --verbose --failed-first --exitfirst --disable-warnings

From a55b446420a565aa3282b46e2876895b21e468eb Mon Sep 17 00:00:00 2001
From: Steffen Cruz <steffenjcruz@gmail.com>
Date: Thu, 18 Jan 2024 17:21:51 -0600
Subject: [PATCH 12/23] Adds task tests

---
 tests/test_tasks.py | 67 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 tests/test_tasks.py

diff --git a/tests/test_tasks.py b/tests/test_tasks.py
new file mode 100644
index 00000000..9475f8ab
--- /dev/null
+++ b/tests/test_tasks.py
@@ -0,0 +1,67 @@
+import pytest
+from prompting.tasks import Task, QuestionAnsweringTask, SummarizationTask, DebuggingTask, MathTask, DateQuestionAnsweringTask
+from prompting.mock import MockPipeline
+
+"""
+What we want to test for each task:
+- The task is initialized correctly
+- The task contains a query
+- The task contains a reference answer
+- Task contains a query_time
+- Task contains a reference_time
+- The task formats correctly
+- All task fields are present as expected
+- Tasks have reward definitions
+"""
+
+
+LLM_PIPELINE = MockPipeline("mock")
+CONTEXT = {"text": "This is a context.", "title": "this is a title"}
+
+TASKS = [
+        QuestionAnsweringTask,
+        SummarizationTask,
+        DebuggingTask,
+        MathTask,
+        DateQuestionAnsweringTask,
+    ]
+CONTEXTS = {
+    QuestionAnsweringTask: {"text": "This is a context.", "title": "this is a title", "categories": ['some','categories']},
+    SummarizationTask: {"text": "This is a context.", "title": "this is a title", "categories": ['some','categories']},
+    DebuggingTask: {"code": "This is code","repo_name":'prompting',"path":'this/is/a/path', "language":'python'},
+    MathTask: {"problem": "This is a problem","solution":'3.1415'},
+    DateQuestionAnsweringTask: {"section": "Events", "event":'1066 - Battle of Hastings in UK', 'date':"1 January 2021"},
+}
+# TODO: Math task only works when solution is floatable
+# TODO: DateQA only accepts section in {Births, Deaths, Events}
+# TODO: DateQA expect wiki entry for event 
+
+@pytest.mark.parametrize('task', TASKS)
+def test_create_task(task: Task):
+    context = CONTEXTS[task]
+    task(llm_pipeline=LLM_PIPELINE, context=context)
+
+@pytest.mark.parametrize('task', TASKS)
+def test_task_contains_query(task: Task):
+    context = CONTEXTS[task]
+    task = task(llm_pipeline=LLM_PIPELINE, context=context)
+    assert task.query is not None
+
+@pytest.mark.parametrize('task', TASKS)
+def test_task_contains_reference(task: Task):
+    context = CONTEXTS[task]
+    task = task(llm_pipeline=LLM_PIPELINE, context=context)
+    assert task.reference is not None
+
+
+# @pytest.mark.parametrize('task', TASKS)
+# def test_task_contains_query_time(task: Task):
+#     context = CONTEXTS[task]
+#     task = task(llm_pipeline=LLM_PIPELINE, context=context)
+#     assert task.reference_time>=0
+
+# @pytest.mark.parametrize('task', TASKS)
+# def test_task_contains_reference_time(task: Task):
+#     context = CONTEXTS[task]
+#     task = task(llm_pipeline=LLM_PIPELINE, context=context)
+#     assert task.query_time>=0

From ab222c39790dd8bfe4e199a1e057e78a2ee90262 Mon Sep 17 00:00:00 2001
From: Steffen Cruz <steffenjcruz@gmail.com>
Date: Thu, 18 Jan 2024 20:45:02 -0600
Subject: [PATCH 13/23] Change pip install flag to -e

---
 .github/workflows/python-package.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 2f1bd463..7f86bd1f 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -28,7 +28,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install flake8 pytest black
-        pip install -r requirements.txt
+        pip install -e .
         pip install -r prompting/requirements.txt
 
     - name: Lint with flake8

From 225747e72516acc84417cda4ea27b1d55ebc8e7b Mon Sep 17 00:00:00 2001
From: steffencruz <steffenjcruz@gmail.com>
Date: Fri, 19 Jan 2024 03:59:00 +0000
Subject: [PATCH 14/23] Apply minmax scaling to rewards

---
 prompting/rewards/reward.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/prompting/rewards/reward.py b/prompting/rewards/reward.py
index 3604ee30..c409b678 100644
--- a/prompting/rewards/reward.py
+++ b/prompting/rewards/reward.py
@@ -18,6 +18,7 @@ class RewardEvent:
     """Contains rewards for all the responses in a batch"""
     model_name: str
     rewards: torch.FloatTensor
+    rewards_normalized: torch.FloatTensor
     timings: torch.FloatTensor
     model_type: RewardModelTypeEnum
     batch_time: float
@@ -27,6 +28,7 @@ class RewardEvent:
     def asdict(self) -> dict:
         return {
             f"{self.model_name}_raw_rewards": self.rewards.tolist(),
+            f"{self.model_name}_rewards": self.rewards_normalized.tolist(),
             f"{self.model_name}_timings": self.timings.tolist(),
             f"{self.model_name}_batch_time": self.batch_time,
             f"{self.model_name}_extra_info": self.extra_info,
@@ -119,6 +121,12 @@ class BatchRewardOutput:
     rewards: torch.FloatTensor
     timings: torch.FloatTensor
     extra_info: dict
+    
+    def __post_init__(self):
+        if self.rewards.shape != self.timings.shape:
+            raise ValueError(f"rewards.shape {self.rewards.shape} != timings.shape {self.timings.shape}")
+        
+        self.rewards_normalized = (self.rewards-self.rewards.min())/(self.rewards.max()-self.rewards.min())
 
 
 class BaseRewardModel(ABC):
@@ -153,6 +161,7 @@ def apply(self, reference: str, response_event) -> RewardEvent:
         return RewardEvent(
             model_name=self.name,
             rewards=batch_rewards_output.rewards,
+            rewards_normalized=batch_rewards_output.rewards_normalized,
             model_type=self.model_type,
             batch_time=batch_rewards_time,
             extra_info=batch_rewards_output.extra_info,

From 04e0a38ed9a2c1394f3669e1ef0f435c93e89675 Mon Sep 17 00:00:00 2001
From: steffencruz <steffenjcruz@gmail.com>
Date: Fri, 19 Jan 2024 04:08:04 +0000
Subject: [PATCH 15/23] Fix logging and add try statement in math reward model

---
 prompting/forward.py            |  2 --
 prompting/rewards/float_diff.py | 23 +++++++++++++----------
 prompting/utils/uids.py         |  6 ++----
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/prompting/forward.py b/prompting/forward.py
index 5a0664a6..2a9ba27d 100644
--- a/prompting/forward.py
+++ b/prompting/forward.py
@@ -95,8 +95,6 @@ async def run_step(
         **response_event.__state_dict__(),
     }
 
-    bt.logging.debug(f"Step complete. Event:\n{event}")
-    # BUG: Make sure everything we log is on CPU not GPU
     log_event(self, event)
 
     return event
diff --git a/prompting/rewards/float_diff.py b/prompting/rewards/float_diff.py
index 04de727b..cc0ed1c6 100644
--- a/prompting/rewards/float_diff.py
+++ b/prompting/rewards/float_diff.py
@@ -36,18 +36,21 @@ def math_score(reference, completion):
         if pred is None:
             return 0.0
 
-        # Convert reference to float (this is okay because we already checked that the reference is a float)
-        # TODO: More flexible parsing of the reference (just as with the completion)
-        ref = float(reference)
-        if pred == ref:
-            return 1.0
+        try:
 
-        # Compute the difference
-        diff = abs(ref - pred)/(ref + 1e-6)
-        # Make sure the difference is between 0 and 1
-        diff = min(abs(diff), 1)
+            # Convert reference to float (this is okay because we already checked that the reference is a float)
+            # TODO: More flexible parsing of the reference (just as with the completion)
+            ref = float(reference)
+            if pred == ref:
+                return 1.0            
+            # Compute the difference
+            diff = abs(ref - pred)/(ref + 1e-6)
+            # Make sure the difference is between 0 and 1
+            diff = min(abs(diff), 1)
 
-        return 1.0 - diff
+            return 1.0 - diff
+        except Exception:
+            return 0.0
 
 
     def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
diff --git a/prompting/utils/uids.py b/prompting/utils/uids.py
index b729c1ac..574ddc31 100644
--- a/prompting/utils/uids.py
+++ b/prompting/utils/uids.py
@@ -20,10 +20,8 @@ def check_uid_availability(
         bt.logging.debug(f"uid: {uid} is not serving")
         return False
     # Filter validator permit > 1024 stake.
-    if metagraph.validator_permit[uid]:
-        bt.logging.debug(f"uid: {uid} has validator permit")
-        if metagraph.S[uid] > vpermit_tao_limit:
-            bt.logging.debug(f"uid: {uid} has stake ({metagraph.S[uid]}) > {vpermit_tao_limit}")
+    if metagraph.validator_permit[uid] and  metagraph.S[uid] > vpermit_tao_limit:
+            bt.logging.debug(f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}")
             return False
     # Available otherwise.
     return True

From ac2f2ead05cb8784c8c90533f0a316b43ff582d6 Mon Sep 17 00:00:00 2001
From: steffencruz <steffenjcruz@gmail.com>
Date: Fri, 19 Jan 2024 04:10:33 +0000
Subject: [PATCH 16/23] Remove comment

---
 scripts/run.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/run.py b/scripts/run.py
index 6366cff9..c0e348b8 100644
--- a/scripts/run.py
+++ b/scripts/run.py
@@ -1,6 +1,5 @@
 import subprocess
 
-# hi mom!
 coldkey = 'sn1-test'
 netuid = 61
 network = 'test'

From 5d085eb602ae8fcd77cda6bdc625a6cadae175fd Mon Sep 17 00:00:00 2001
From: steffencruz <steffenjcruz@gmail.com>
Date: Fri, 19 Jan 2024 04:11:18 +0000
Subject: [PATCH 17/23] Fix indentation error

---
 prompting/utils/uids.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/prompting/utils/uids.py b/prompting/utils/uids.py
index 574ddc31..97ab103c 100644
--- a/prompting/utils/uids.py
+++ b/prompting/utils/uids.py
@@ -20,9 +20,9 @@ def check_uid_availability(
         bt.logging.debug(f"uid: {uid} is not serving")
         return False
     # Filter validator permit > 1024 stake.
-    if metagraph.validator_permit[uid] and  metagraph.S[uid] > vpermit_tao_limit:
-            bt.logging.debug(f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}")
-            return False
+    if metagraph.validator_permit[uid] and metagraph.S[uid] > vpermit_tao_limit:
+        bt.logging.debug(f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}")
+        return False
     # Available otherwise.
     return True
 

From 7524531c004b0d2a0bde4e7552e73400069571d8 Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Fri, 19 Jan 2024 16:48:24 +0000
Subject: [PATCH 18/23] Add tests

---
 tests/test_agent.py                    | 82 ++++++++++++++++++++++++++
 tests/test_dataset.py                  | 27 +++++++++
 tests/test_dataset_task_integration.py | 50 ++++++++++++++++
 tests/test_persona.py                  | 14 +++++
 tests/test_tasks.py                    | 20 ++++++-
 5 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_agent.py
 create mode 100644 tests/test_dataset.py
 create mode 100644 tests/test_dataset_task_integration.py
 create mode 100644 tests/test_persona.py

diff --git a/tests/test_agent.py b/tests/test_agent.py
new file mode 100644
index 00000000..da783a50
--- /dev/null
+++ b/tests/test_agent.py
@@ -0,0 +1,82 @@
+import pytest 
+from prompting.agent import Persona
+from prompting.agent import HumanAgent
+from prompting.tasks import Task, QuestionAnsweringTask, SummarizationTask, DebuggingTask, MathTask, DateQuestionAnsweringTask
+from prompting.tools import MockDataset, CodingDataset, WikiDataset, StackOverflowDataset, DateQADataset, MathDataset
+from prompting.mock import MockPipeline
+
+"""
+Things to test:
+ - Agent is initialized correctly
+    - Agent contains a persona
+    - Agent contains a task
+    - Agent can make queries
+    - Agent can make responses
+
+    - Persona is initialized correctly
+        - Persona contains a mood
+        - Persona contains a tone
+        - Persona contains a topic
+        - Persona contains a subject
+        - Persona contains a description
+        - Persona contains a goal
+        - Persona contains a query
+
+    - Task is initialized correctly
+        - Task contains a query
+        - Task contains a reference
+        - Task contains a context
+        - Task contains a complete flag
+
+
+"""
+TASKS = [
+        QuestionAnsweringTask,
+        SummarizationTask,
+        DebuggingTask,
+        MathTask,
+        DateQuestionAnsweringTask,
+    ]
+LLM_PIPELINE = MockPipeline("mock")
+CONTEXTS = {
+    QuestionAnsweringTask: WikiDataset().next(),
+    SummarizationTask: WikiDataset().next(),
+    DebuggingTask: CodingDataset().next(),
+    MathTask: MathDataset().next(),
+    DateQuestionAnsweringTask: DateQADataset().next(),
+}
+
+@pytest.mark.parametrize('task', TASKS)
+def test_agent_creation_with_dataset_context(task: Task):
+    context = CONTEXTS[task]
+    task = task(llm_pipeline=LLM_PIPELINE, context=context)
+    agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
+    assert agent is not None
+
+@pytest.mark.parametrize('task', TASKS)
+def test_agent_contains_persona(task: Task):
+    context = CONTEXTS[task]
+    task = task(llm_pipeline=LLM_PIPELINE, context=context)
+    agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
+    assert agent.persona is not None
+
+@pytest.mark.parametrize('task', TASKS)
+def test_agent_contains_task(task: Task):
+    context = CONTEXTS[task]
+    task = task(llm_pipeline=LLM_PIPELINE, context=context)
+    agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
+    assert agent.task is not None
+
+@pytest.mark.parametrize('task', TASKS)
+def test_agent_can_make_queries(task: Task):
+    context = CONTEXTS[task]
+    task = task(llm_pipeline=LLM_PIPELINE, context=context)
+    agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True)
+    assert agent.query is not None
+
+@pytest.mark.parametrize('task', TASKS)
+def test_agent_can_make_challenges(task: Task):
+    context = CONTEXTS[task]
+    task = task(llm_pipeline=LLM_PIPELINE, context=context)
+    agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task)
+    assert agent.challenge is not None
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
new file mode 100644
index 00000000..8e1802e2
--- /dev/null
+++ b/tests/test_dataset.py
@@ -0,0 +1,27 @@
+import pytest
+
+from prompting.tools import MockDataset, CodingDataset, WikiDataset, StackOverflowDataset, DateQADataset, MathDataset
+
+
+
+
+DATASETS = [
+    MockDataset,
+    CodingDataset,
+    WikiDataset,
+    StackOverflowDataset,
+    DateQADataset,
+    MathDataset,
+]
+
+
+@pytest.mark.parametrize('dataset', DATASETS)
+def test_create_task(dataset):
+    data = dataset()
+    assert data is not None
+
+
+@pytest.mark.parametrize('dataset', DATASETS)
+def test_create_task(dataset):
+    data = dataset()
+    assert data.next() is not None
\ No newline at end of file
diff --git a/tests/test_dataset_task_integration.py b/tests/test_dataset_task_integration.py
new file mode 100644
index 00000000..24f12726
--- /dev/null
+++ b/tests/test_dataset_task_integration.py
@@ -0,0 +1,50 @@
+import pytest
+from prompting.tasks import Task, QuestionAnsweringTask, SummarizationTask, DebuggingTask, MathTask, DateQuestionAnsweringTask
+from prompting.tools import MockDataset, CodingDataset, WikiDataset, StackOverflowDataset, DateQADataset, MathDataset
+from prompting.mock import MockPipeline
+
+
+"""
+What we want: 
+
+- The task is initialized correctly using dataset
+- The task contains a query using dataset
+- The task contains a reference answer using dataset
+"""
+
+
+TASKS = [
+        QuestionAnsweringTask,
+        SummarizationTask,
+        DebuggingTask,
+        MathTask,
+        DateQuestionAnsweringTask,
+    ]
+CONTEXTS = {
+    QuestionAnsweringTask: WikiDataset().next(),
+    SummarizationTask: WikiDataset().next(),
+    DebuggingTask: CodingDataset().next(),
+    MathTask: MathDataset().next(),
+    DateQuestionAnsweringTask: DateQADataset().next(),
+}
+
+LLM_PIPELINE = MockPipeline("mock")
+
+@pytest.mark.parametrize('task', TASKS)
+def test_task_creation_with_dataset_context(task: Task):
+    context = CONTEXTS[task]
+    task(llm_pipeline=LLM_PIPELINE, context=context)
+    assert task is not None
+
+@pytest.mark.parametrize('task', TASKS)
+def test_task_contains_query(task: Task):
+    context = CONTEXTS[task]
+    task = task(llm_pipeline=LLM_PIPELINE, context=context)
+    assert task.query is not None
+
+@pytest.mark.parametrize('task', TASKS)
+def test_task_contains_reference(task: Task):
+    context = CONTEXTS[task]
+    task = task(llm_pipeline=LLM_PIPELINE, context=context)
+    assert task.reference is not None
+
diff --git a/tests/test_persona.py b/tests/test_persona.py
new file mode 100644
index 00000000..4f3097a5
--- /dev/null
+++ b/tests/test_persona.py
@@ -0,0 +1,14 @@
+import pytest
+from prompting.persona import Persona, create_persona
+
+def test_persona_initialization():
+    assert create_persona() is not None
+
+def test_persona_contains_mood():
+    assert create_persona().mood is not None
+
+def test_persona_contains_tone():
+    assert create_persona().tone is not None
+
+def test_persona_contains_profile():
+    assert create_persona().profile is not None
\ No newline at end of file
diff --git a/tests/test_tasks.py b/tests/test_tasks.py
index 9475f8ab..1a5df0ea 100644
--- a/tests/test_tasks.py
+++ b/tests/test_tasks.py
@@ -30,8 +30,9 @@
     SummarizationTask: {"text": "This is a context.", "title": "this is a title", "categories": ['some','categories']},
     DebuggingTask: {"code": "This is code","repo_name":'prompting',"path":'this/is/a/path', "language":'python'},
     MathTask: {"problem": "This is a problem","solution":'3.1415'},
-    DateQuestionAnsweringTask: {"section": "Events", "event":'1066 - Battle of Hastings in UK', 'date':"1 January 2021"},
+    DateQuestionAnsweringTask: {"section": "Events", "event":"1953 - Battle of Hastings in UK", 'date':"1 January"},
 }
+
 # TODO: Math task only works when solution is floatable
 # TODO: DateQA only accepts section in {Births, Deaths, Events}
 # TODO: DateQA expect wiki entry for event 
@@ -53,6 +54,23 @@ def test_task_contains_reference(task: Task):
     task = task(llm_pipeline=LLM_PIPELINE, context=context)
     assert task.reference is not None
 
+# @pytest.mark.parametrize('task', TASKS)
+# def test_task_contains_reward_definition(task: Task):
+#     context = CONTEXTS[task]
+#     task = task(llm_pipeline=LLM_PIPELINE, context=context)
+#     assert task.reward_definition is not None    
+
+# @pytest.mark.parametrize('task', TASKS)
+# def test_task_contains_goal(task: Task):
+#     context = CONTEXTS[task]
+#     task = task(llm_pipeline=LLM_PIPELINE, context=context)
+#     assert task.goal is not None
+
+# @pytest.mark.parametrize('task', TASKS)
+# def test_task_contains_desc(task: Task):
+#     context = CONTEXTS[task]
+#     task = task(llm_pipeline=LLM_PIPELINE, context=context)
+#     assert task.desc is not None
 
 # @pytest.mark.parametrize('task', TASKS)
 # def test_task_contains_query_time(task: Task):

From 7086c7b1282c12169263206050bf64641dc231aa Mon Sep 17 00:00:00 2001
From: steffencruz <steffenjcruz@gmail.com>
Date: Fri, 19 Jan 2024 19:03:40 +0000
Subject: [PATCH 19/23] Package install, run only tests in tests/ and remove
 circleci

---
 .circleci/config.yml                 | 168 ---------------------------
 .github/workflows/python-package.yml |   4 +-
 tests/test_template_validator.py     | 112 ------------------
 3 files changed, 2 insertions(+), 282 deletions(-)
 delete mode 100644 .circleci/config.yml
 delete mode 100644 tests/test_template_validator.py

diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index 0473afe6..00000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,168 +0,0 @@
-version: 2.1
-
-orbs:
-  python: circleci/python@2.1.1
-  python-lib: dialogue/python-lib@0.1.55
-  # coveralls: coveralls/coveralls@1.0.6
-
-jobs:
-  black:
-    resource_class: small
-    parameters:
-      python-version:
-        type: string
-    docker:
-      - image: cimg/python:<< parameters.python-version >>
-
-    steps:
-      - checkout
-
-      - restore_cache:
-          name: Restore cached black venv
-          keys:
-            - v1-pypi-py-black-<< parameters.python-version >>
-
-      - run:
-          name: Update & Activate black venv
-          command: |
-            python -m venv env/
-            . env/bin/activate
-            python -m pip install --upgrade pip
-            pip install black
-
-      - save_cache:
-          name: Save cached black venv
-          paths:
-            - "env/"
-          key: v1-pypi-py-black-<< parameters.python-version >>
-
-      - run:
-          name: Black format check
-          command: |
-            . env/bin/activate
-            black --line-length 79 --exclude '(env|venv|.eggs)' --check .
-
-  pylint:
-    resource_class: small
-    parameters:
-      python-version:
-        type: string
-    docker:
-      - image: cimg/python:<< parameters.python-version >>
-
-    steps:
-      - checkout
-
-      - run:
-          name: Install Pylint
-          command: |
-            python -m venv env/
-            . env/bin/activate
-            pip install pylint
-
-      - run:
-          name: Pylint check
-          command: |
-            . env/bin/activate
-            pylint --fail-on=W,E,F --exit-zero  ./
-
-  check_compatibility:
-    parameters:
-      python_version:
-        type: string
-    docker:
-      - image: cimg/python:3.10
-    steps:
-      - checkout
-      - run:
-          name: Check if requirements files have changed
-          command: ./scripts/check_requirements_changes.sh
-      - run:
-          name: Install dependencies and Check compatibility
-          command: |
-            if [ "$REQUIREMENTS_CHANGED" == "true" ]; then
-              sudo apt-get update
-              sudo apt-get install -y jq curl
-              ./scripts/check_compatibility.sh << parameters.python_version >>
-            else
-              echo "Skipping compatibility checks..."
-            fi
-
-  build:
-    resource_class: medium
-    parallelism: 2
-    parameters:
-      python-version:
-        type: string
-    docker:
-      - image: cimg/python:<< parameters.python-version >>
-
-    steps:
-      - checkout
-
-      - restore_cache:
-          name: Restore cached venv
-          keys:
-            - v1-pypi-py<< parameters.python-version >>-{{ checksum "requirements.txt" }}
-            - v1-pypi-py<< parameters.python-version >>
-
-      - run:
-          name: Update & Activate venv
-          command: |
-            python -m venv env/
-            . env/bin/activate
-            python -m pip install --upgrade pip
-
-      - save_cache:
-          name: Save cached venv
-          paths:
-            - "env/"
-          key: v1-pypi-py<< parameters.python-version >>-{{ checksum "requirements.txt" }}
-
-      - run:
-          name: Install Bittensor Subnet Template
-          command: |
-            . env/bin/activate
-            pip install -e .
-
-      - store_test_results:
-          path: test-results
-      - store_artifacts:
-          path: test-results
-
-  coveralls:
-    docker:
-      - image: cimg/python:3.10
-    steps:
-      - run:
-          name: Combine Coverage
-          command: |
-            pip3 install --upgrade coveralls
-            coveralls --finish --rcfile .coveragerc || echo "Failed to upload coverage"
-
-workflows:
-  compatibility_checks:
-    jobs:
-      - check_compatibility:
-          python_version: "3.8"
-          name: check-compatibility-3.8
-      - check_compatibility:
-          python_version: "3.9"
-          name: check-compatibility-3.9
-      - check_compatibility:
-          python_version: "3.10"
-          name: check-compatibility-3.10
-      - check_compatibility:
-          python_version: "3.11"
-          name: check-compatibility-3.11
-
-  pr-requirements:
-    jobs:
-      - black:
-          python-version: "3.8.12"
-      - pylint:
-          python-version: "3.8.12"
-      - build:
-          matrix:
-            parameters:
-              python-version: ["3.9.13", "3.10.6", "3.11.4"]
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index b0a00f12..2ff1211e 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -28,7 +28,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install flake8 pytest black
-        pip install -r requirements.txt
+        pip install -e .
         pip install -r prompting/requirements.txt
 
     - name: Lint with flake8
@@ -42,4 +42,4 @@ jobs:
         black .
     - name: Test with pytest
       run: |
-        pytest
+        pytest tests/
diff --git a/tests/test_template_validator.py b/tests/test_template_validator.py
deleted file mode 100644
index 2ce7ac2c..00000000
--- a/tests/test_template_validator.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# The MIT License (MIT)
-# Copyright © 2023 Yuma Rao
-# Copyright © 2023 Opentensor Foundation
-
-# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
-# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
-# the Software.
-
-# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
-# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-import sys
-import torch
-import unittest
-import bittensor as bt
-
-from neurons.validator import Neuron as Validator
-from neurons.miner import Neuron as Miner
-
-from prompting.protocol import Dummy
-from prompting.validator.forward import forward
-from prompting.utils.uids import get_random_uids
-from prompting.validator.reward import get_rewards
-from prompting.base.validator import BaseValidatorNeuron
-
-
-class TemplateValidatorNeuronTestCase(unittest.TestCase):
-    """
-    This class contains unit tests for the RewardEvent classes.
-
-    The tests cover different scenarios where completions may or may not be successful and the reward events are checked that they don't contain missing values.
-    The `reward` attribute of all RewardEvents is expected to be a float, and the `is_filter_model` attribute is expected to be a boolean.
-    """
-
-    def setUp(self):
-        sys.argv = sys.argv[0] + ["--config", "tests/configs/validator.json"]
-
-        config = BaseValidatorNeuron.config()
-        config.wallet._mock = True
-        config.metagraph._mock = True
-        config.subtensor._mock = True
-        self.neuron = Validator(config)
-        self.miner_uids = get_random_uids(self, k=10)
-
-    def test_run_single_step(self):
-        # TODO: Test a single step
-        pass
-
-    def test_sync_error_if_not_registered(self):
-        # TODO: Test that the validator throws an error if it is not registered on metagraph
-        pass
-
-    def test_forward(self):
-        # TODO: Test that the forward function returns the correct value
-        pass
-
-    def test_dummy_responses(self):
-        # TODO: Test that the dummy responses are correctly constructed
-
-        responses = self.neuron.dendrite.query(
-            # Send the query to miners in the network.
-            axons=[self.neuron.metagraph.axons[uid] for uid in self.miner_uids],
-            # Construct a dummy query.
-            synapse=Dummy(dummy_input=self.neuron.step),
-            # All responses have the deserialize function called on them before returning.
-            deserialize=True,
-        )
-
-        for i, response in enumerate(responses):
-            self.assertEqual(response, self.neuron.step * 2)
-
-    def test_reward(self):
-        # TODO: Test that the reward function returns the correct value
-        responses = self.dendrite.query(
-            # Send the query to miners in the network.
-            axons=[self.metagraph.axons[uid] for uid in self.miner_uids],
-            # Construct a dummy query.
-            synapse=Dummy(dummy_input=self.neuron.step),
-            # All responses have the deserialize function called on them before returning.
-            deserialize=True,
-        )
-
-        rewards = get_rewards(self.neuron, responses)
-        expected_rewards = torch.FloatTensor([1.0] * len(responses))
-        self.assertEqual(rewards, expected_rewards)
-
-    def test_reward_with_nan(self):
-        # TODO: Test that NaN rewards are correctly sanitized
-        # TODO: Test that a bt.logging.warning is thrown when a NaN reward is sanitized
-        responses = self.dendrite.query(
-            # Send the query to miners in the network.
-            axons=[self.metagraph.axons[uid] for uid in self.miner_uids],
-            # Construct a dummy query.
-            synapse=Dummy(dummy_input=self.neuron.step),
-            # All responses have the deserialize function called on them before returning.
-            deserialize=True,
-        )
-
-        rewards = get_rewards(self.neuron, responses)
-        expected_rewards = rewards.clone()
-        # Add NaN values to rewards
-        rewards[0] = float("nan")
-
-        with self.assertLogs(bt.logging, level="WARNING") as cm:
-            self.neuron.update_scores(rewards, self.miner_uids)

From e74dd581920111bde992afb616f5df080cc7874d Mon Sep 17 00:00:00 2001
From: steffencruz <steffenjcruz@gmail.com>
Date: Fri, 19 Jan 2024 19:10:18 +0000
Subject: [PATCH 20/23] Add custom test conditions

---
 .github/workflows/python-package.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 2ff1211e..36058317 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -42,4 +42,5 @@ jobs:
         black .
     - name: Test with pytest
       run: |
-        pytest tests/
+        # run tests in tests/ dir and only fail if there are failures or errors
+        pytest tests/ --verbose --failed-first --exitfirst --disable-warnings
\ No newline at end of file

From a9c945e9f1cd803566b01f0978033217bc823cf7 Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Fri, 19 Jan 2024 19:18:42 +0000
Subject: [PATCH 21/23] Remove undeployed tasks

---
 tests/test_agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_agent.py b/tests/test_agent.py
index da783a50..192657e2 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -33,8 +33,8 @@
 TASKS = [
         QuestionAnsweringTask,
         SummarizationTask,
-        DebuggingTask,
-        MathTask,
+        #DebuggingTask,
+        #MathTask,
         DateQuestionAnsweringTask,
     ]
 LLM_PIPELINE = MockPipeline("mock")

From f3d2c848176651c36db9fa15ff43cdfca378664f Mon Sep 17 00:00:00 2001
From: p-ferreira <38992619+p-ferreira@users.noreply.github.com>
Date: Fri, 19 Jan 2024 14:35:31 -0500
Subject: [PATCH 22/23] updates default wandb of validators

---
 prompting/utils/config.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/prompting/utils/config.py b/prompting/utils/config.py
index df51b873..ae9fba48 100644
--- a/prompting/utils/config.py
+++ b/prompting/utils/config.py
@@ -287,7 +287,14 @@ def add_validator_args(cls, parser):
         "--wandb.project_name",
         type=str,
         help="The name of the project where you are sending the new run.",
-        default="synapse_agent_experiments",
+        default="alpha-validators",
+    )
+
+    parser.add_argument(
+        "--wandb.entity",
+        type=str,
+        help="The name of the project where you are sending the new run.",
+        default="opentensor-dev",
     )
 
 

From e20e03bb61c01474b80ade29680e2af7b9aa8b21 Mon Sep 17 00:00:00 2001
From: bkb2135 <bkb2135@columbia.edu>
Date: Fri, 19 Jan 2024 19:42:39 +0000
Subject: [PATCH 23/23] Remove math Task from pytest

---
 tests/test_dataset_task_integration.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_dataset_task_integration.py b/tests/test_dataset_task_integration.py
index 24f12726..7df3c569 100644
--- a/tests/test_dataset_task_integration.py
+++ b/tests/test_dataset_task_integration.py
@@ -16,8 +16,8 @@
 TASKS = [
         QuestionAnsweringTask,
         SummarizationTask,
-        DebuggingTask,
-        MathTask,
+        #DebuggingTask,
+        #MathTask,
         DateQuestionAnsweringTask,
     ]
 CONTEXTS = {