From 4045687dfe673b9770db1f2764e3c226251415ec Mon Sep 17 00:00:00 2001 From: Steffen Cruz Date: Mon, 15 Jan 2024 21:07:31 -0600 Subject: [PATCH 01/23] Adds config to select only uids with unique coldkeys/ips to prevent network caching --- prompting/utils/config.py | 15 +++++++++++- prompting/utils/uids.py | 39 +++++++++++++++++++++---------- tests/test_uids.py | 48 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 13 deletions(-) create mode 100644 tests/test_uids.py diff --git a/prompting/utils/config.py b/prompting/utils/config.py index 0c5c243b..4a19142d 100644 --- a/prompting/utils/config.py +++ b/prompting/utils/config.py @@ -97,7 +97,7 @@ def add_args(cls, parser): help="If set, we dont save events to a log file.", default=False, ) - + parser.add_argument( "--neuron.log_full", action="store_true", @@ -288,6 +288,19 @@ def add_validator_args(cls, parser): default=4096, ) + parser.add_argument( + "--neuron.query_unique_coldkeys", + action="store_true", + help="Only query a single hotkey per coldkey.", + default=False, + ) + + parser.add_argument( + "--neuron.query_unique_ips", + action="store_true", + help="Only query a single hotkey per ip.", + default=False, + ) def config(cls): """ diff --git a/prompting/utils/uids.py b/prompting/utils/uids.py index e007dffd..057a825a 100644 --- a/prompting/utils/uids.py +++ b/prompting/utils/uids.py @@ -5,13 +5,15 @@ def check_uid_availability( - metagraph: "bt.metagraph.Metagraph", uid: int, vpermit_tao_limit: int + metagraph: "bt.metagraph.Metagraph", uid: int, vpermit_tao_limit: int, coldkeys: set = None, ips: set = None, ) -> bool: """Check if uid is available. The UID should be available if it is serving and has less than vpermit_tao_limit stake Args: metagraph (:obj: bt.metagraph.Metagraph): Metagraph object uid (int): uid to be checked vpermit_tao_limit (int): Validator permit tao limit + coldkeys (set): Set of coldkeys to exclude + ips (set): Set of ips to exclude Returns: bool: True if uid is available, False otherwise """ @@ -20,11 +22,16 @@ def check_uid_availability( bt.logging.warning(f"uid: {uid} is not serving") return False # Filter validator permit > 1024 stake. - if metagraph.validator_permit[uid]: - bt.logging.warning(f"uid: {uid} has validator permit") - if metagraph.S[uid] > vpermit_tao_limit: - bt.logging.warning(f"uid: {uid} has stake ({metagraph.S[uid]}) > {vpermit_tao_limit}") + if metagraph.validator_permit[uid] and metagraph.S[uid] > vpermit_tao_limit: + bt.logging.warning(f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}") return False + + if coldkeys and metagraph.axons[uid].coldkey in coldkeys: + return False + + if ips and metagraph.axons[uid].ip in ips: + return False + # Available otherwise. return True @@ -43,19 +50,27 @@ def get_random_uids( """ candidate_uids = [] avail_uids = [] - + coldkeys = set() + ips = set() for uid in range(self.metagraph.n.item()): if uid == self.uid: continue + uid_is_available = check_uid_availability( - self.metagraph, uid, self.config.neuron.vpermit_tao_limit + self.metagraph, uid, self.config.neuron.vpermit_tao_limit, coldkeys, ips, ) - uid_is_not_excluded = exclude is None or uid not in exclude + if not uid_is_available: + continue + + if self.config.neuron.query_unique_coldkeys: + coldkeys.add(self.metagraph.axons[uid].coldkey) + + if self.config.neuron.query_unique_ips: + ips.add(self.metagraph.axons[uid].ip) - if uid_is_available: - avail_uids.append(uid) - if uid_is_not_excluded: - candidate_uids.append(uid) + avail_uids.append(uid) + if exclude is None or uid not in exclude: + candidate_uids.append(uid) # Check if candidate_uids contain enough for querying, if not grab all avaliable uids available_uids = candidate_uids diff --git a/tests/test_uids.py b/tests/test_uids.py new file mode 100644 index 00000000..79e5b66f --- /dev/null +++ b/tests/test_uids.py @@ -0,0 +1,48 @@ + +import torch +import pytest +from types import SimpleNamespace +from prompting.utils.uids import get_random_uids + + +def make_mock_neuron(unique_coldkeys=False, unique_ips=False, vpermit_tao_limit=1000): + + axons = [ + SimpleNamespace(coldkey="a", ip="0.0.0.1", is_serving=True), + SimpleNamespace(coldkey="a", ip="0.0.0.0", is_serving=True), + SimpleNamespace(coldkey="b", ip="0.0.0.1", is_serving=True), + SimpleNamespace(coldkey="b", ip="0.0.0.0", is_serving=True), + SimpleNamespace(coldkey="c", ip="0.0.0.2", is_serving=True), + ] + metagraph = SimpleNamespace( + axons = axons, + validator_permit = torch.ones(len(axons), dtype=torch.bool), + S = torch.zeros(len(axons)), + n = torch.tensor(len(axons)) + ) + + return SimpleNamespace( + uid = 4, + config = SimpleNamespace( + neuron = SimpleNamespace( + vpermit_tao_limit = vpermit_tao_limit, + query_unique_coldkeys = unique_coldkeys, + query_unique_ips = unique_ips, + ) + ), + metagraph = metagraph + ) + +@pytest.mark.parametrize( + "unique_coldkeys, unique_ips, k, expected_result", [ + (False, False, 4, [0, 1, 2, 3]), + (True, False, 2, [0, 2]), + (False, True, 2, [0, 1]), + (True, True, 2, [0, 3]) + ]) +def test_check_uid_availability(unique_coldkeys, unique_ips, k, expected_result): + + mock_neuron = make_mock_neuron(unique_coldkeys, unique_ips) + + assert sorted(get_random_uids(mock_neuron, k).tolist()) == expected_result, "Incorrect uids returned." + From 839f5ed9969c4d21fcb447ffe5345cb8f47961e3 Mon Sep 17 00:00:00 2001 From: Steffen Cruz Date: Mon, 15 Jan 2024 21:16:55 -0600 Subject: [PATCH 02/23] Rename test --- tests/test_uids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_uids.py b/tests/test_uids.py index 79e5b66f..18bcd22d 100644 --- a/tests/test_uids.py +++ b/tests/test_uids.py @@ -40,7 +40,7 @@ def make_mock_neuron(unique_coldkeys=False, unique_ips=False, vpermit_tao_limit= (False, True, 2, [0, 1]), (True, True, 2, [0, 3]) ]) -def test_check_uid_availability(unique_coldkeys, unique_ips, k, expected_result): +def test_get_random_uids(unique_coldkeys, unique_ips, k, expected_result): mock_neuron = make_mock_neuron(unique_coldkeys, unique_ips) From 356c633d64431fa61ca6d613fc38973344ab1da9 Mon Sep 17 00:00:00 2001 From: p-ferreira Date: Wed, 17 Jan 2024 16:46:28 +0000 Subject: [PATCH 03/23] Hey.... --- scripts/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/run.py b/scripts/run.py index d711223c..d35f5b3d 100644 --- a/scripts/run.py +++ b/scripts/run.py @@ -1,5 +1,6 @@ import subprocess +# hi mom! coldkey = 'sn1-test' netuid = 61 network = 'test' From a8c2c1e4ae9a68e86c1e71394e16ae89af32b22e Mon Sep 17 00:00:00 2001 From: Steffen Cruz Date: Thu, 18 Jan 2024 15:28:39 -0600 Subject: [PATCH 04/23] Set relevance to zero when completion is empty, and lower the logging level of save state to debug --- prompting/base/neuron.py | 4 ++-- prompting/rewards/relevance.py | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/prompting/base/neuron.py b/prompting/base/neuron.py index 2c85ce08..002f7a44 100644 --- a/prompting/base/neuron.py +++ b/prompting/base/neuron.py @@ -165,11 +165,11 @@ def should_set_weights(self) -> bool: ) > self.config.neuron.epoch_length def save_state(self): - bt.logging.warning( + bt.logging.debug( "save_state() not implemented for this neuron. You can implement this function to save model checkpoints or other useful data." ) def load_state(self): - bt.logging.warning( + bt.logging.debug( "load_state() not implemented for this neuron. You can implement this function to load model checkpoints or other useful data." ) diff --git a/prompting/rewards/relevance.py b/prompting/rewards/relevance.py index 072ae72b..f327f636 100644 --- a/prompting/rewards/relevance.py +++ b/prompting/rewards/relevance.py @@ -32,13 +32,17 @@ def reward( self, reference: str, completions: List[str] ) -> BatchRewardOutput: reference_embedding = self.model.encode(reference, to_numpy=False) - completions_embeddings = self.model.encode(completions, to_numpy=False) rewards = [] timings = [] - for emb in completions_embeddings: + for comp in completions: t0 = time.time() - rewards.append(cosine_similarity(reference_embedding.reshape(1, -1), emb.reshape(1, -1))) + score = 0 + if comp: + emb = self.model.encode(completions, to_numpy=False) + score = cosine_similarity(reference_embedding.reshape(1, -1), emb.reshape(1, -1)) + + rewards.append(score) timings.append(time.time() - t0) output = BatchRewardOutput( From 878a181f4576cf0d569d31711a68f98f0caffcd9 Mon Sep 17 00:00:00 2001 From: Steffen Cruz Date: Thu, 18 Jan 2024 15:37:08 -0600 Subject: [PATCH 05/23] Fix bug where relevance was calculated w.r.t. all completions instead of a specific completion --- prompting/rewards/relevance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompting/rewards/relevance.py b/prompting/rewards/relevance.py index f327f636..4fa01484 100644 --- a/prompting/rewards/relevance.py +++ b/prompting/rewards/relevance.py @@ -39,7 +39,7 @@ def reward( t0 = time.time() score = 0 if comp: - emb = self.model.encode(completions, to_numpy=False) + emb = self.model.encode(comp, to_numpy=False) score = cosine_similarity(reference_embedding.reshape(1, -1), emb.reshape(1, -1)) rewards.append(score) From 6988219845964182480656a4e9259cd0aabd9007 Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Thu, 18 Jan 2024 21:49:29 +0000 Subject: [PATCH 06/23] Add prompting/requirements to pip install --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1fde95b2..b36eefc6 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ This repository requires python3.8 or higher. To install, simply clone this repo ```bash git clone https://github.com/opentensor/prompting.git cd prompting -python -m pip install -r requirements.txt +python -m pip install -r requirements.txt -r prompting/requirements.txt python -m pip install -e . ``` @@ -107,7 +107,7 @@ Prior to running a miner or validator, you must [create a wallet](https://github The validator and base miner are based on [zephyr](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta), which is a fine-tuned Mistral-7B. -**To run a validator or zephyr miner you will need 40GB VRAM (we're working on bringing this down to 32).** +**To run a validator or zephyr miner you will need 20GB VRAM** ```bash # To run the validator python neurons/validator.py From afe4a92da23414a43d768591e28fcdee0854ad73 Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Thu, 18 Jan 2024 21:49:44 +0000 Subject: [PATCH 07/23] Fix docstrings --- neurons/miners/zephyr/miner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neurons/miners/zephyr/miner.py b/neurons/miners/zephyr/miner.py index 76c9277d..2d75aa8a 100644 --- a/neurons/miners/zephyr/miner.py +++ b/neurons/miners/zephyr/miner.py @@ -35,7 +35,7 @@ class ZephyrMiner(Miner): """ Base miner which runs zephyr (https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) - This requires a GPU with at least 40GB of memory. + This requires a GPU with at least 20GB of memory. To run this miner from the project root directory: @@ -45,7 +45,7 @@ class ZephyrMiner(Miner): @classmethod def add_args(cls, parser: argparse.ArgumentParser): """ - Adds OpenAI-specific arguments to the command line parser. + Adds arguments to the command line parser. """ super().add_args(parser) parser.add_argument( From 3192dcc3e6353c87ba1bf4b53ed32f15c4705d8d Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Thu, 18 Jan 2024 22:08:45 +0000 Subject: [PATCH 08/23] Fix saying only 20 GB VRAM needed --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b36eefc6..d28e41fd 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ Prior to running a miner or validator, you must [create a wallet](https://github The validator and base miner are based on [zephyr](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta), which is a fine-tuned Mistral-7B. -**To run a validator or zephyr miner you will need 20GB VRAM** +**To run a validator you will need 24GB of VRAM or 18GB of VRAM for a zephyr miner** ```bash # To run the validator python neurons/validator.py From 7da51d3bbae973244c7d9272be584af326451a0b Mon Sep 17 00:00:00 2001 From: Steffen Cruz Date: Thu, 18 Jan 2024 16:17:23 -0600 Subject: [PATCH 09/23] Remove template test --- tests/test_template_validator.py | 112 ------------------------------- 1 file changed, 112 deletions(-) delete mode 100644 tests/test_template_validator.py diff --git a/tests/test_template_validator.py b/tests/test_template_validator.py deleted file mode 100644 index 2ce7ac2c..00000000 --- a/tests/test_template_validator.py +++ /dev/null @@ -1,112 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# Copyright © 2023 Opentensor Foundation - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import sys -import torch -import unittest -import bittensor as bt - -from neurons.validator import Neuron as Validator -from neurons.miner import Neuron as Miner - -from prompting.protocol import Dummy -from prompting.validator.forward import forward -from prompting.utils.uids import get_random_uids -from prompting.validator.reward import get_rewards -from prompting.base.validator import BaseValidatorNeuron - - -class TemplateValidatorNeuronTestCase(unittest.TestCase): - """ - This class contains unit tests for the RewardEvent classes. - - The tests cover different scenarios where completions may or may not be successful and the reward events are checked that they don't contain missing values. - The `reward` attribute of all RewardEvents is expected to be a float, and the `is_filter_model` attribute is expected to be a boolean. - """ - - def setUp(self): - sys.argv = sys.argv[0] + ["--config", "tests/configs/validator.json"] - - config = BaseValidatorNeuron.config() - config.wallet._mock = True - config.metagraph._mock = True - config.subtensor._mock = True - self.neuron = Validator(config) - self.miner_uids = get_random_uids(self, k=10) - - def test_run_single_step(self): - # TODO: Test a single step - pass - - def test_sync_error_if_not_registered(self): - # TODO: Test that the validator throws an error if it is not registered on metagraph - pass - - def test_forward(self): - # TODO: Test that the forward function returns the correct value - pass - - def test_dummy_responses(self): - # TODO: Test that the dummy responses are correctly constructed - - responses = self.neuron.dendrite.query( - # Send the query to miners in the network. - axons=[self.neuron.metagraph.axons[uid] for uid in self.miner_uids], - # Construct a dummy query. - synapse=Dummy(dummy_input=self.neuron.step), - # All responses have the deserialize function called on them before returning. - deserialize=True, - ) - - for i, response in enumerate(responses): - self.assertEqual(response, self.neuron.step * 2) - - def test_reward(self): - # TODO: Test that the reward function returns the correct value - responses = self.dendrite.query( - # Send the query to miners in the network. - axons=[self.metagraph.axons[uid] for uid in self.miner_uids], - # Construct a dummy query. - synapse=Dummy(dummy_input=self.neuron.step), - # All responses have the deserialize function called on them before returning. - deserialize=True, - ) - - rewards = get_rewards(self.neuron, responses) - expected_rewards = torch.FloatTensor([1.0] * len(responses)) - self.assertEqual(rewards, expected_rewards) - - def test_reward_with_nan(self): - # TODO: Test that NaN rewards are correctly sanitized - # TODO: Test that a bt.logging.warning is thrown when a NaN reward is sanitized - responses = self.dendrite.query( - # Send the query to miners in the network. - axons=[self.metagraph.axons[uid] for uid in self.miner_uids], - # Construct a dummy query. - synapse=Dummy(dummy_input=self.neuron.step), - # All responses have the deserialize function called on them before returning. - deserialize=True, - ) - - rewards = get_rewards(self.neuron, responses) - expected_rewards = rewards.clone() - # Add NaN values to rewards - rewards[0] = float("nan") - - with self.assertLogs(bt.logging, level="WARNING") as cm: - self.neuron.update_scores(rewards, self.miner_uids) From 16a398894223bdfc9fc4f61d62e72347981bb956 Mon Sep 17 00:00:00 2001 From: Steffen Cruz Date: Thu, 18 Jan 2024 16:59:02 -0600 Subject: [PATCH 10/23] Remove circle CI --- .circleci/config.yml | 168 ------------------------------------------- 1 file changed, 168 deletions(-) delete mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 0473afe6..00000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,168 +0,0 @@ -version: 2.1 - -orbs: - python: circleci/python@2.1.1 - python-lib: dialogue/python-lib@0.1.55 - # coveralls: coveralls/coveralls@1.0.6 - -jobs: - black: - resource_class: small - parameters: - python-version: - type: string - docker: - - image: cimg/python:<< parameters.python-version >> - - steps: - - checkout - - - restore_cache: - name: Restore cached black venv - keys: - - v1-pypi-py-black-<< parameters.python-version >> - - - run: - name: Update & Activate black venv - command: | - python -m venv env/ - . env/bin/activate - python -m pip install --upgrade pip - pip install black - - - save_cache: - name: Save cached black venv - paths: - - "env/" - key: v1-pypi-py-black-<< parameters.python-version >> - - - run: - name: Black format check - command: | - . env/bin/activate - black --line-length 79 --exclude '(env|venv|.eggs)' --check . - - pylint: - resource_class: small - parameters: - python-version: - type: string - docker: - - image: cimg/python:<< parameters.python-version >> - - steps: - - checkout - - - run: - name: Install Pylint - command: | - python -m venv env/ - . env/bin/activate - pip install pylint - - - run: - name: Pylint check - command: | - . env/bin/activate - pylint --fail-on=W,E,F --exit-zero ./ - - check_compatibility: - parameters: - python_version: - type: string - docker: - - image: cimg/python:3.10 - steps: - - checkout - - run: - name: Check if requirements files have changed - command: ./scripts/check_requirements_changes.sh - - run: - name: Install dependencies and Check compatibility - command: | - if [ "$REQUIREMENTS_CHANGED" == "true" ]; then - sudo apt-get update - sudo apt-get install -y jq curl - ./scripts/check_compatibility.sh << parameters.python_version >> - else - echo "Skipping compatibility checks..." - fi - - build: - resource_class: medium - parallelism: 2 - parameters: - python-version: - type: string - docker: - - image: cimg/python:<< parameters.python-version >> - - steps: - - checkout - - - restore_cache: - name: Restore cached venv - keys: - - v1-pypi-py<< parameters.python-version >>-{{ checksum "requirements.txt" }} - - v1-pypi-py<< parameters.python-version >> - - - run: - name: Update & Activate venv - command: | - python -m venv env/ - . env/bin/activate - python -m pip install --upgrade pip - - - save_cache: - name: Save cached venv - paths: - - "env/" - key: v1-pypi-py<< parameters.python-version >>-{{ checksum "requirements.txt" }} - - - run: - name: Install Bittensor Subnet Template - command: | - . env/bin/activate - pip install -e . - - - store_test_results: - path: test-results - - store_artifacts: - path: test-results - - coveralls: - docker: - - image: cimg/python:3.10 - steps: - - run: - name: Combine Coverage - command: | - pip3 install --upgrade coveralls - coveralls --finish --rcfile .coveragerc || echo "Failed to upload coverage" - -workflows: - compatibility_checks: - jobs: - - check_compatibility: - python_version: "3.8" - name: check-compatibility-3.8 - - check_compatibility: - python_version: "3.9" - name: check-compatibility-3.9 - - check_compatibility: - python_version: "3.10" - name: check-compatibility-3.10 - - check_compatibility: - python_version: "3.11" - name: check-compatibility-3.11 - - pr-requirements: - jobs: - - black: - python-version: "3.8.12" - - pylint: - python-version: "3.8.12" - - build: - matrix: - parameters: - python-version: ["3.9.13", "3.10.6", "3.11.4"] From 5b4f73786d090ceb49df3a4bae05b693ec984536 Mon Sep 17 00:00:00 2001 From: Steffen Cruz Date: Thu, 18 Jan 2024 17:00:08 -0600 Subject: [PATCH 11/23] Only run tests in tests/ dir and only fail on error and failure --- .github/workflows/python-package.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index b0a00f12..2f1bd463 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -42,4 +42,5 @@ jobs: black . - name: Test with pytest run: | - pytest + # run tests in tests/ dir and only fail if there are failures or errors + pytest tests/ --verbose --failed-first --exitfirst --disable-warnings From a55b446420a565aa3282b46e2876895b21e468eb Mon Sep 17 00:00:00 2001 From: Steffen Cruz Date: Thu, 18 Jan 2024 17:21:51 -0600 Subject: [PATCH 12/23] Adds task tests --- tests/test_tasks.py | 67 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 tests/test_tasks.py diff --git a/tests/test_tasks.py b/tests/test_tasks.py new file mode 100644 index 00000000..9475f8ab --- /dev/null +++ b/tests/test_tasks.py @@ -0,0 +1,67 @@ +import pytest +from prompting.tasks import Task, QuestionAnsweringTask, SummarizationTask, DebuggingTask, MathTask, DateQuestionAnsweringTask +from prompting.mock import MockPipeline + +""" +What we want to test for each task: +- The task is initialized correctly +- The task contains a query +- The task contains a reference answer +- Task contains a query_time +- Task contains a reference_time +- The task formats correctly +- All task fields are present as expected +- Tasks have reward definitions +""" + + +LLM_PIPELINE = MockPipeline("mock") +CONTEXT = {"text": "This is a context.", "title": "this is a title"} + +TASKS = [ + QuestionAnsweringTask, + SummarizationTask, + DebuggingTask, + MathTask, + DateQuestionAnsweringTask, + ] +CONTEXTS = { + QuestionAnsweringTask: {"text": "This is a context.", "title": "this is a title", "categories": ['some','categories']}, + SummarizationTask: {"text": "This is a context.", "title": "this is a title", "categories": ['some','categories']}, + DebuggingTask: {"code": "This is code","repo_name":'prompting',"path":'this/is/a/path', "language":'python'}, + MathTask: {"problem": "This is a problem","solution":'3.1415'}, + DateQuestionAnsweringTask: {"section": "Events", "event":'1066 - Battle of Hastings in UK', 'date':"1 January 2021"}, +} +# TODO: Math task only works when solution is floatable +# TODO: DateQA only accepts section in {Births, Deaths, Events} +# TODO: DateQA expect wiki entry for event + +@pytest.mark.parametrize('task', TASKS) +def test_create_task(task: Task): + context = CONTEXTS[task] + task(llm_pipeline=LLM_PIPELINE, context=context) + +@pytest.mark.parametrize('task', TASKS) +def test_task_contains_query(task: Task): + context = CONTEXTS[task] + task = task(llm_pipeline=LLM_PIPELINE, context=context) + assert task.query is not None + +@pytest.mark.parametrize('task', TASKS) +def test_task_contains_reference(task: Task): + context = CONTEXTS[task] + task = task(llm_pipeline=LLM_PIPELINE, context=context) + assert task.reference is not None + + +# @pytest.mark.parametrize('task', TASKS) +# def test_task_contains_query_time(task: Task): +# context = CONTEXTS[task] +# task = task(llm_pipeline=LLM_PIPELINE, context=context) +# assert task.reference_time>=0 + +# @pytest.mark.parametrize('task', TASKS) +# def test_task_contains_reference_time(task: Task): +# context = CONTEXTS[task] +# task = task(llm_pipeline=LLM_PIPELINE, context=context) +# assert task.query_time>=0 From ab222c39790dd8bfe4e199a1e057e78a2ee90262 Mon Sep 17 00:00:00 2001 From: Steffen Cruz Date: Thu, 18 Jan 2024 20:45:02 -0600 Subject: [PATCH 13/23] Change pip install flag to -e --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2f1bd463..7f86bd1f 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -28,7 +28,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest black - pip install -r requirements.txt + pip install -e . pip install -r prompting/requirements.txt - name: Lint with flake8 From 225747e72516acc84417cda4ea27b1d55ebc8e7b Mon Sep 17 00:00:00 2001 From: steffencruz Date: Fri, 19 Jan 2024 03:59:00 +0000 Subject: [PATCH 14/23] Apply minmax scaling to rewards --- prompting/rewards/reward.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/prompting/rewards/reward.py b/prompting/rewards/reward.py index 3604ee30..c409b678 100644 --- a/prompting/rewards/reward.py +++ b/prompting/rewards/reward.py @@ -18,6 +18,7 @@ class RewardEvent: """Contains rewards for all the responses in a batch""" model_name: str rewards: torch.FloatTensor + rewards_normalized: torch.FloatTensor timings: torch.FloatTensor model_type: RewardModelTypeEnum batch_time: float @@ -27,6 +28,7 @@ class RewardEvent: def asdict(self) -> dict: return { f"{self.model_name}_raw_rewards": self.rewards.tolist(), + f"{self.model_name}_rewards": self.rewards_normalized.tolist(), f"{self.model_name}_timings": self.timings.tolist(), f"{self.model_name}_batch_time": self.batch_time, f"{self.model_name}_extra_info": self.extra_info, @@ -119,6 +121,12 @@ class BatchRewardOutput: rewards: torch.FloatTensor timings: torch.FloatTensor extra_info: dict + + def __post_init__(self): + if self.rewards.shape != self.timings.shape: + raise ValueError(f"rewards.shape {self.rewards.shape} != timings.shape {self.timings.shape}") + + self.rewards_normalized = (self.rewards-self.rewards.min())/(self.rewards.max()-self.rewards.min()) class BaseRewardModel(ABC): @@ -153,6 +161,7 @@ def apply(self, reference: str, response_event) -> RewardEvent: return RewardEvent( model_name=self.name, rewards=batch_rewards_output.rewards, + rewards_normalized=batch_rewards_output.rewards_normalized, model_type=self.model_type, batch_time=batch_rewards_time, extra_info=batch_rewards_output.extra_info, From 04e0a38ed9a2c1394f3669e1ef0f435c93e89675 Mon Sep 17 00:00:00 2001 From: steffencruz Date: Fri, 19 Jan 2024 04:08:04 +0000 Subject: [PATCH 15/23] Fix logging and add try statement in math reward model --- prompting/forward.py | 2 -- prompting/rewards/float_diff.py | 23 +++++++++++++---------- prompting/utils/uids.py | 6 ++---- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/prompting/forward.py b/prompting/forward.py index 5a0664a6..2a9ba27d 100644 --- a/prompting/forward.py +++ b/prompting/forward.py @@ -95,8 +95,6 @@ async def run_step( **response_event.__state_dict__(), } - bt.logging.debug(f"Step complete. Event:\n{event}") - # BUG: Make sure everything we log is on CPU not GPU log_event(self, event) return event diff --git a/prompting/rewards/float_diff.py b/prompting/rewards/float_diff.py index 04de727b..cc0ed1c6 100644 --- a/prompting/rewards/float_diff.py +++ b/prompting/rewards/float_diff.py @@ -36,18 +36,21 @@ def math_score(reference, completion): if pred is None: return 0.0 - # Convert reference to float (this is okay because we already checked that the reference is a float) - # TODO: More flexible parsing of the reference (just as with the completion) - ref = float(reference) - if pred == ref: - return 1.0 + try: - # Compute the difference - diff = abs(ref - pred)/(ref + 1e-6) - # Make sure the difference is between 0 and 1 - diff = min(abs(diff), 1) + # Convert reference to float (this is okay because we already checked that the reference is a float) + # TODO: More flexible parsing of the reference (just as with the completion) + ref = float(reference) + if pred == ref: + return 1.0 + # Compute the difference + diff = abs(ref - pred)/(ref + 1e-6) + # Make sure the difference is between 0 and 1 + diff = min(abs(diff), 1) - return 1.0 - diff + return 1.0 - diff + except Exception: + return 0.0 def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput: diff --git a/prompting/utils/uids.py b/prompting/utils/uids.py index b729c1ac..574ddc31 100644 --- a/prompting/utils/uids.py +++ b/prompting/utils/uids.py @@ -20,10 +20,8 @@ def check_uid_availability( bt.logging.debug(f"uid: {uid} is not serving") return False # Filter validator permit > 1024 stake. - if metagraph.validator_permit[uid]: - bt.logging.debug(f"uid: {uid} has validator permit") - if metagraph.S[uid] > vpermit_tao_limit: - bt.logging.debug(f"uid: {uid} has stake ({metagraph.S[uid]}) > {vpermit_tao_limit}") + if metagraph.validator_permit[uid] and metagraph.S[uid] > vpermit_tao_limit: + bt.logging.debug(f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}") return False # Available otherwise. return True From ac2f2ead05cb8784c8c90533f0a316b43ff582d6 Mon Sep 17 00:00:00 2001 From: steffencruz Date: Fri, 19 Jan 2024 04:10:33 +0000 Subject: [PATCH 16/23] Remove comment --- scripts/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/run.py b/scripts/run.py index 6366cff9..c0e348b8 100644 --- a/scripts/run.py +++ b/scripts/run.py @@ -1,6 +1,5 @@ import subprocess -# hi mom! coldkey = 'sn1-test' netuid = 61 network = 'test' From 5d085eb602ae8fcd77cda6bdc625a6cadae175fd Mon Sep 17 00:00:00 2001 From: steffencruz Date: Fri, 19 Jan 2024 04:11:18 +0000 Subject: [PATCH 17/23] Fix indentation error --- prompting/utils/uids.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prompting/utils/uids.py b/prompting/utils/uids.py index 574ddc31..97ab103c 100644 --- a/prompting/utils/uids.py +++ b/prompting/utils/uids.py @@ -20,9 +20,9 @@ def check_uid_availability( bt.logging.debug(f"uid: {uid} is not serving") return False # Filter validator permit > 1024 stake. - if metagraph.validator_permit[uid] and metagraph.S[uid] > vpermit_tao_limit: - bt.logging.debug(f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}") - return False + if metagraph.validator_permit[uid] and metagraph.S[uid] > vpermit_tao_limit: + bt.logging.debug(f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}") + return False # Available otherwise. return True From 7524531c004b0d2a0bde4e7552e73400069571d8 Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Fri, 19 Jan 2024 16:48:24 +0000 Subject: [PATCH 18/23] Add tests --- tests/test_agent.py | 82 ++++++++++++++++++++++++++ tests/test_dataset.py | 27 +++++++++ tests/test_dataset_task_integration.py | 50 ++++++++++++++++ tests/test_persona.py | 14 +++++ tests/test_tasks.py | 20 ++++++- 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 tests/test_agent.py create mode 100644 tests/test_dataset.py create mode 100644 tests/test_dataset_task_integration.py create mode 100644 tests/test_persona.py diff --git a/tests/test_agent.py b/tests/test_agent.py new file mode 100644 index 00000000..da783a50 --- /dev/null +++ b/tests/test_agent.py @@ -0,0 +1,82 @@ +import pytest +from prompting.agent import Persona +from prompting.agent import HumanAgent +from prompting.tasks import Task, QuestionAnsweringTask, SummarizationTask, DebuggingTask, MathTask, DateQuestionAnsweringTask +from prompting.tools import MockDataset, CodingDataset, WikiDataset, StackOverflowDataset, DateQADataset, MathDataset +from prompting.mock import MockPipeline + +""" +Things to test: + - Agent is initialized correctly + - Agent contains a persona + - Agent contains a task + - Agent can make queries + - Agent can make responses + + - Persona is initialized correctly + - Persona contains a mood + - Persona contains a tone + - Persona contains a topic + - Persona contains a subject + - Persona contains a description + - Persona contains a goal + - Persona contains a query + + - Task is initialized correctly + - Task contains a query + - Task contains a reference + - Task contains a context + - Task contains a complete flag + + +""" +TASKS = [ + QuestionAnsweringTask, + SummarizationTask, + DebuggingTask, + MathTask, + DateQuestionAnsweringTask, + ] +LLM_PIPELINE = MockPipeline("mock") +CONTEXTS = { + QuestionAnsweringTask: WikiDataset().next(), + SummarizationTask: WikiDataset().next(), + DebuggingTask: CodingDataset().next(), + MathTask: MathDataset().next(), + DateQuestionAnsweringTask: DateQADataset().next(), +} + +@pytest.mark.parametrize('task', TASKS) +def test_agent_creation_with_dataset_context(task: Task): + context = CONTEXTS[task] + task = task(llm_pipeline=LLM_PIPELINE, context=context) + agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True) + assert agent is not None + +@pytest.mark.parametrize('task', TASKS) +def test_agent_contains_persona(task: Task): + context = CONTEXTS[task] + task = task(llm_pipeline=LLM_PIPELINE, context=context) + agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True) + assert agent.persona is not None + +@pytest.mark.parametrize('task', TASKS) +def test_agent_contains_task(task: Task): + context = CONTEXTS[task] + task = task(llm_pipeline=LLM_PIPELINE, context=context) + agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True) + assert agent.task is not None + +@pytest.mark.parametrize('task', TASKS) +def test_agent_can_make_queries(task: Task): + context = CONTEXTS[task] + task = task(llm_pipeline=LLM_PIPELINE, context=context) + agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task, begin_conversation=True) + assert agent.query is not None + +@pytest.mark.parametrize('task', TASKS) +def test_agent_can_make_challenges(task: Task): + context = CONTEXTS[task] + task = task(llm_pipeline=LLM_PIPELINE, context=context) + agent = HumanAgent(llm_pipeline=LLM_PIPELINE, task=task) + assert agent.challenge is not None diff --git a/tests/test_dataset.py b/tests/test_dataset.py new file mode 100644 index 00000000..8e1802e2 --- /dev/null +++ b/tests/test_dataset.py @@ -0,0 +1,27 @@ +import pytest + +from prompting.tools import MockDataset, CodingDataset, WikiDataset, StackOverflowDataset, DateQADataset, MathDataset + + + + +DATASETS = [ + MockDataset, + CodingDataset, + WikiDataset, + StackOverflowDataset, + DateQADataset, + MathDataset, +] + + +@pytest.mark.parametrize('dataset', DATASETS) +def test_create_task(dataset): + data = dataset() + assert data is not None + + +@pytest.mark.parametrize('dataset', DATASETS) +def test_create_task(dataset): + data = dataset() + assert data.next() is not None \ No newline at end of file diff --git a/tests/test_dataset_task_integration.py b/tests/test_dataset_task_integration.py new file mode 100644 index 00000000..24f12726 --- /dev/null +++ b/tests/test_dataset_task_integration.py @@ -0,0 +1,50 @@ +import pytest +from prompting.tasks import Task, QuestionAnsweringTask, SummarizationTask, DebuggingTask, MathTask, DateQuestionAnsweringTask +from prompting.tools import MockDataset, CodingDataset, WikiDataset, StackOverflowDataset, DateQADataset, MathDataset +from prompting.mock import MockPipeline + + +""" +What we want: + +- The task is initialized correctly using dataset +- The task contains a query using dataset +- The task contains a reference answer using dataset +""" + + +TASKS = [ + QuestionAnsweringTask, + SummarizationTask, + DebuggingTask, + MathTask, + DateQuestionAnsweringTask, + ] +CONTEXTS = { + QuestionAnsweringTask: WikiDataset().next(), + SummarizationTask: WikiDataset().next(), + DebuggingTask: CodingDataset().next(), + MathTask: MathDataset().next(), + DateQuestionAnsweringTask: DateQADataset().next(), +} + +LLM_PIPELINE = MockPipeline("mock") + +@pytest.mark.parametrize('task', TASKS) +def test_task_creation_with_dataset_context(task: Task): + context = CONTEXTS[task] + task(llm_pipeline=LLM_PIPELINE, context=context) + assert task is not None + +@pytest.mark.parametrize('task', TASKS) +def test_task_contains_query(task: Task): + context = CONTEXTS[task] + task = task(llm_pipeline=LLM_PIPELINE, context=context) + assert task.query is not None + +@pytest.mark.parametrize('task', TASKS) +def test_task_contains_reference(task: Task): + context = CONTEXTS[task] + task = task(llm_pipeline=LLM_PIPELINE, context=context) + assert task.reference is not None + diff --git a/tests/test_persona.py b/tests/test_persona.py new file mode 100644 index 00000000..4f3097a5 --- /dev/null +++ b/tests/test_persona.py @@ -0,0 +1,14 @@ +import pytest +from prompting.persona import Persona, create_persona + +def test_persona_initialization(): + assert create_persona() is not None + +def test_persona_contains_mood(): + assert create_persona().mood is not None + +def test_persona_contains_tone(): + assert create_persona().tone is not None + +def test_persona_contains_profile(): + assert create_persona().profile is not None \ No newline at end of file diff --git a/tests/test_tasks.py b/tests/test_tasks.py index 9475f8ab..1a5df0ea 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -30,8 +30,9 @@ SummarizationTask: {"text": "This is a context.", "title": "this is a title", "categories": ['some','categories']}, DebuggingTask: {"code": "This is code","repo_name":'prompting',"path":'this/is/a/path', "language":'python'}, MathTask: {"problem": "This is a problem","solution":'3.1415'}, - DateQuestionAnsweringTask: {"section": "Events", "event":'1066 - Battle of Hastings in UK', 'date':"1 January 2021"}, + DateQuestionAnsweringTask: {"section": "Events", "event":"1953 - Battle of Hastings in UK", 'date':"1 January"}, } + # TODO: Math task only works when solution is floatable # TODO: DateQA only accepts section in {Births, Deaths, Events} # TODO: DateQA expect wiki entry for event @@ -53,6 +54,23 @@ def test_task_contains_reference(task: Task): task = task(llm_pipeline=LLM_PIPELINE, context=context) assert task.reference is not None +# @pytest.mark.parametrize('task', TASKS) +# def test_task_contains_reward_definition(task: Task): +# context = CONTEXTS[task] +# task = task(llm_pipeline=LLM_PIPELINE, context=context) +# assert task.reward_definition is not None + +# @pytest.mark.parametrize('task', TASKS) +# def test_task_contains_goal(task: Task): +# context = CONTEXTS[task] +# task = task(llm_pipeline=LLM_PIPELINE, context=context) +# assert task.goal is not None + +# @pytest.mark.parametrize('task', TASKS) +# def test_task_contains_desc(task: Task): +# context = CONTEXTS[task] +# task = task(llm_pipeline=LLM_PIPELINE, context=context) +# assert task.desc is not None # @pytest.mark.parametrize('task', TASKS) # def test_task_contains_query_time(task: Task): From 7086c7b1282c12169263206050bf64641dc231aa Mon Sep 17 00:00:00 2001 From: steffencruz Date: Fri, 19 Jan 2024 19:03:40 +0000 Subject: [PATCH 19/23] Package install, run only tests in tests/ and remove circleci --- .circleci/config.yml | 168 --------------------------- .github/workflows/python-package.yml | 4 +- tests/test_template_validator.py | 112 ------------------ 3 files changed, 2 insertions(+), 282 deletions(-) delete mode 100644 .circleci/config.yml delete mode 100644 tests/test_template_validator.py diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 0473afe6..00000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,168 +0,0 @@ -version: 2.1 - -orbs: - python: circleci/python@2.1.1 - python-lib: dialogue/python-lib@0.1.55 - # coveralls: coveralls/coveralls@1.0.6 - -jobs: - black: - resource_class: small - parameters: - python-version: - type: string - docker: - - image: cimg/python:<< parameters.python-version >> - - steps: - - checkout - - - restore_cache: - name: Restore cached black venv - keys: - - v1-pypi-py-black-<< parameters.python-version >> - - - run: - name: Update & Activate black venv - command: | - python -m venv env/ - . env/bin/activate - python -m pip install --upgrade pip - pip install black - - - save_cache: - name: Save cached black venv - paths: - - "env/" - key: v1-pypi-py-black-<< parameters.python-version >> - - - run: - name: Black format check - command: | - . env/bin/activate - black --line-length 79 --exclude '(env|venv|.eggs)' --check . - - pylint: - resource_class: small - parameters: - python-version: - type: string - docker: - - image: cimg/python:<< parameters.python-version >> - - steps: - - checkout - - - run: - name: Install Pylint - command: | - python -m venv env/ - . env/bin/activate - pip install pylint - - - run: - name: Pylint check - command: | - . env/bin/activate - pylint --fail-on=W,E,F --exit-zero ./ - - check_compatibility: - parameters: - python_version: - type: string - docker: - - image: cimg/python:3.10 - steps: - - checkout - - run: - name: Check if requirements files have changed - command: ./scripts/check_requirements_changes.sh - - run: - name: Install dependencies and Check compatibility - command: | - if [ "$REQUIREMENTS_CHANGED" == "true" ]; then - sudo apt-get update - sudo apt-get install -y jq curl - ./scripts/check_compatibility.sh << parameters.python_version >> - else - echo "Skipping compatibility checks..." - fi - - build: - resource_class: medium - parallelism: 2 - parameters: - python-version: - type: string - docker: - - image: cimg/python:<< parameters.python-version >> - - steps: - - checkout - - - restore_cache: - name: Restore cached venv - keys: - - v1-pypi-py<< parameters.python-version >>-{{ checksum "requirements.txt" }} - - v1-pypi-py<< parameters.python-version >> - - - run: - name: Update & Activate venv - command: | - python -m venv env/ - . env/bin/activate - python -m pip install --upgrade pip - - - save_cache: - name: Save cached venv - paths: - - "env/" - key: v1-pypi-py<< parameters.python-version >>-{{ checksum "requirements.txt" }} - - - run: - name: Install Bittensor Subnet Template - command: | - . env/bin/activate - pip install -e . - - - store_test_results: - path: test-results - - store_artifacts: - path: test-results - - coveralls: - docker: - - image: cimg/python:3.10 - steps: - - run: - name: Combine Coverage - command: | - pip3 install --upgrade coveralls - coveralls --finish --rcfile .coveragerc || echo "Failed to upload coverage" - -workflows: - compatibility_checks: - jobs: - - check_compatibility: - python_version: "3.8" - name: check-compatibility-3.8 - - check_compatibility: - python_version: "3.9" - name: check-compatibility-3.9 - - check_compatibility: - python_version: "3.10" - name: check-compatibility-3.10 - - check_compatibility: - python_version: "3.11" - name: check-compatibility-3.11 - - pr-requirements: - jobs: - - black: - python-version: "3.8.12" - - pylint: - python-version: "3.8.12" - - build: - matrix: - parameters: - python-version: ["3.9.13", "3.10.6", "3.11.4"] diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index b0a00f12..2ff1211e 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -28,7 +28,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest black - pip install -r requirements.txt + pip install -e . pip install -r prompting/requirements.txt - name: Lint with flake8 @@ -42,4 +42,4 @@ jobs: black . - name: Test with pytest run: | - pytest + pytest tests/ diff --git a/tests/test_template_validator.py b/tests/test_template_validator.py deleted file mode 100644 index 2ce7ac2c..00000000 --- a/tests/test_template_validator.py +++ /dev/null @@ -1,112 +0,0 @@ -# The MIT License (MIT) -# Copyright © 2023 Yuma Rao -# Copyright © 2023 Opentensor Foundation - -# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -# documentation files (the “Software”), to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all copies or substantial portions of -# the Software. - -# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import sys -import torch -import unittest -import bittensor as bt - -from neurons.validator import Neuron as Validator -from neurons.miner import Neuron as Miner - -from prompting.protocol import Dummy -from prompting.validator.forward import forward -from prompting.utils.uids import get_random_uids -from prompting.validator.reward import get_rewards -from prompting.base.validator import BaseValidatorNeuron - - -class TemplateValidatorNeuronTestCase(unittest.TestCase): - """ - This class contains unit tests for the RewardEvent classes. - - The tests cover different scenarios where completions may or may not be successful and the reward events are checked that they don't contain missing values. - The `reward` attribute of all RewardEvents is expected to be a float, and the `is_filter_model` attribute is expected to be a boolean. - """ - - def setUp(self): - sys.argv = sys.argv[0] + ["--config", "tests/configs/validator.json"] - - config = BaseValidatorNeuron.config() - config.wallet._mock = True - config.metagraph._mock = True - config.subtensor._mock = True - self.neuron = Validator(config) - self.miner_uids = get_random_uids(self, k=10) - - def test_run_single_step(self): - # TODO: Test a single step - pass - - def test_sync_error_if_not_registered(self): - # TODO: Test that the validator throws an error if it is not registered on metagraph - pass - - def test_forward(self): - # TODO: Test that the forward function returns the correct value - pass - - def test_dummy_responses(self): - # TODO: Test that the dummy responses are correctly constructed - - responses = self.neuron.dendrite.query( - # Send the query to miners in the network. - axons=[self.neuron.metagraph.axons[uid] for uid in self.miner_uids], - # Construct a dummy query. - synapse=Dummy(dummy_input=self.neuron.step), - # All responses have the deserialize function called on them before returning. - deserialize=True, - ) - - for i, response in enumerate(responses): - self.assertEqual(response, self.neuron.step * 2) - - def test_reward(self): - # TODO: Test that the reward function returns the correct value - responses = self.dendrite.query( - # Send the query to miners in the network. - axons=[self.metagraph.axons[uid] for uid in self.miner_uids], - # Construct a dummy query. - synapse=Dummy(dummy_input=self.neuron.step), - # All responses have the deserialize function called on them before returning. - deserialize=True, - ) - - rewards = get_rewards(self.neuron, responses) - expected_rewards = torch.FloatTensor([1.0] * len(responses)) - self.assertEqual(rewards, expected_rewards) - - def test_reward_with_nan(self): - # TODO: Test that NaN rewards are correctly sanitized - # TODO: Test that a bt.logging.warning is thrown when a NaN reward is sanitized - responses = self.dendrite.query( - # Send the query to miners in the network. - axons=[self.metagraph.axons[uid] for uid in self.miner_uids], - # Construct a dummy query. - synapse=Dummy(dummy_input=self.neuron.step), - # All responses have the deserialize function called on them before returning. - deserialize=True, - ) - - rewards = get_rewards(self.neuron, responses) - expected_rewards = rewards.clone() - # Add NaN values to rewards - rewards[0] = float("nan") - - with self.assertLogs(bt.logging, level="WARNING") as cm: - self.neuron.update_scores(rewards, self.miner_uids) From e74dd581920111bde992afb616f5df080cc7874d Mon Sep 17 00:00:00 2001 From: steffencruz Date: Fri, 19 Jan 2024 19:10:18 +0000 Subject: [PATCH 20/23] Add custom test conditions --- .github/workflows/python-package.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2ff1211e..36058317 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -42,4 +42,5 @@ jobs: black . - name: Test with pytest run: | - pytest tests/ + # run tests in tests/ dir and only fail if there are failures or errors + pytest tests/ --verbose --failed-first --exitfirst --disable-warnings \ No newline at end of file From a9c945e9f1cd803566b01f0978033217bc823cf7 Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Fri, 19 Jan 2024 19:18:42 +0000 Subject: [PATCH 21/23] Remove undeployed tasks --- tests/test_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_agent.py b/tests/test_agent.py index da783a50..192657e2 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -33,8 +33,8 @@ TASKS = [ QuestionAnsweringTask, SummarizationTask, - DebuggingTask, - MathTask, + #DebuggingTask, + #MathTask, DateQuestionAnsweringTask, ] LLM_PIPELINE = MockPipeline("mock") From f3d2c848176651c36db9fa15ff43cdfca378664f Mon Sep 17 00:00:00 2001 From: p-ferreira <38992619+p-ferreira@users.noreply.github.com> Date: Fri, 19 Jan 2024 14:35:31 -0500 Subject: [PATCH 22/23] updates default wandb of validators --- prompting/utils/config.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/prompting/utils/config.py b/prompting/utils/config.py index df51b873..ae9fba48 100644 --- a/prompting/utils/config.py +++ b/prompting/utils/config.py @@ -287,7 +287,14 @@ def add_validator_args(cls, parser): "--wandb.project_name", type=str, help="The name of the project where you are sending the new run.", - default="synapse_agent_experiments", + default="alpha-validators", + ) + + parser.add_argument( + "--wandb.entity", + type=str, + help="The name of the project where you are sending the new run.", + default="opentensor-dev", ) From e20e03bb61c01474b80ade29680e2af7b9aa8b21 Mon Sep 17 00:00:00 2001 From: bkb2135 Date: Fri, 19 Jan 2024 19:42:39 +0000 Subject: [PATCH 23/23] Remove math Task from pytest --- tests/test_dataset_task_integration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_dataset_task_integration.py b/tests/test_dataset_task_integration.py index 24f12726..7df3c569 100644 --- a/tests/test_dataset_task_integration.py +++ b/tests/test_dataset_task_integration.py @@ -16,8 +16,8 @@ TASKS = [ QuestionAnsweringTask, SummarizationTask, - DebuggingTask, - MathTask, + #DebuggingTask, + #MathTask, DateQuestionAnsweringTask, ] CONTEXTS = {