Skip to content

Commit

Permalink
chore: remove wandb (#123)
Browse files Browse the repository at this point in the history
  • Loading branch information
jarvis8x7b authored Feb 12, 2025
1 parent 8636eb4 commit 78e23dd
Show file tree
Hide file tree
Showing 8 changed files with 7 additions and 186 deletions.
4 changes: 0 additions & 4 deletions .env.validator.example
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ SUBTENSOR_ENDPOINT=wss://entrypoint-finney.opentensor.ai:443
# NETUID=98
# SUBTENSOR_NETWORK=test
# SUBTENSOR_ENDPOINT=ws://testnet-lite:9944
# WANDB_PROJECT_NAME=dojo-testnet

WANDB_API_KEY=
WANDB_PROJECT_NAME=dojo-mainnet

# for dojo-synthetic-api
OPENROUTER_API_KEY=
Expand Down
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ By creating an open platform for gathering human-generated datasets, Tensorplex
- docker
- GNU make
- openrouter api key
- wandb api key

### System Requirements

Expand Down Expand Up @@ -423,8 +422,6 @@ WALLET_COLDKEY=# the name of the coldkey
WALLET_HOTKEY=# the name of the hotkey
DATASET_SERVICE_BASE_URL=https://dojo-validator-api.tensorplex.ai

# head to https://wandb.ai/authorize to get your API key
WANDB_API_KEY="<wandb_key>"

# for dojo-synthetic-api
OPENROUTER_API_KEY="sk-or-v1-<KEY>"
Expand Down
77 changes: 0 additions & 77 deletions commons/logging/wandb.py

This file was deleted.

8 changes: 0 additions & 8 deletions dojo/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,14 +218,6 @@ def add_args(parser):
default=0.3,
)

wandb_project_names = ["dojo-devnet", "dojo-testnet", "dojo-mainnet"]
parser.add_argument(
"--wandb.project_name",
type=str,
choices=wandb_project_names,
help="Name of the wandb project to use.",
)

elif neuron_type == "miner":
pass

Expand Down
2 changes: 0 additions & 2 deletions entrypoints.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ if [ "$1" = 'validator' ]; then
echo "SUBTENSOR_NETWORK: ${SUBTENSOR_NETWORK}"
echo "SUBTENSOR_ENDPOINT: ${SUBTENSOR_ENDPOINT}"
echo "NETUID: ${NETUID}"
echo "WANDB_PROJECT_NAME: ${WANDB_PROJECT_NAME}"

EXTRA_ARGS=""
if [ "${SIMULATION}" = "true" ]; then
Expand All @@ -71,7 +70,6 @@ if [ "$1" = 'validator' ]; then
--wallet.name ${WALLET_COLDKEY} \
--wallet.hotkey ${WALLET_HOTKEY} \
--neuron.type validator \
--wandb.project_name ${WANDB_PROJECT_NAME} \
${EXTRA_ARGS}
fi

Expand Down
2 changes: 0 additions & 2 deletions main_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from contextlib import asynccontextmanager

import uvicorn
import wandb
from bittensor.utils.btlogging import logging as logger
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
Expand All @@ -27,7 +26,6 @@ async def lifespan(app: FastAPI):
validator._should_exit = True
validator.executor.shutdown(wait=True)
validator.subtensor.substrate.close()
wandb.finish()
await validator.save_state()
await SyntheticAPI.close_session()
await disconnect_db()
Expand Down
94 changes: 6 additions & 88 deletions neurons/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@
import bittensor as bt
import numpy as np
import torch
import wandb
from bittensor.utils.btlogging import logging as logger
from bittensor.utils.weight_utils import process_weights_for_netuid
from fastapi.encoders import jsonable_encoder
from tenacity import RetryError
from torch.nn import functional as F
from websocket import create_connection
Expand All @@ -30,7 +28,6 @@
NoNewExpiredTasksYet,
SetWeightsFailed,
)
from commons.logging.wandb import init_wandb
from commons.obfuscation.obfuscation_utils import obfuscate_html_and_js
from commons.objects import ObjectManager
from commons.orm import ORM
Expand Down Expand Up @@ -116,8 +113,6 @@ def __init__(self):
)
self.check_registered()

init_wandb(config=self.config, my_uid=self.uid, wallet=self.wallet)

# Run score migration before loading state
migration_success = self.loop.run_until_complete(ScoreStorage.migrate_from_db())
if not migration_success:
Expand Down Expand Up @@ -277,7 +272,7 @@ async def _set_weights(self, uids: torch.Tensor, weights: torch.Tensor):
while attempt < max_attempts and not result:
try:
logger.debug(
f"Set weights attempt {attempt+1}/{max_attempts} at block: {self.block},time: {time.time()}"
f"Set weights attempt {attempt + 1}/{max_attempts} at block: {self.block},time: {time.time()}"
)

# Disable this for now to check validator hanging issue
Expand Down Expand Up @@ -306,7 +301,7 @@ async def _set_weights(self, uids: torch.Tensor, weights: torch.Tensor):

except Exception:
logger.warning(
f"Failed to set weights with attempt {attempt+1}/{max_attempts} due to: {message}"
f"Failed to set weights with attempt {attempt + 1}/{max_attempts} due to: {message}"
)

if attempt == max_attempts:
Expand Down Expand Up @@ -1054,7 +1049,7 @@ async def _send_shuffled_requests(
all_responses.extend(flat_batch_responses)

logger.info(
f"Processed batch {i//batch_size + 1} of {(len(axons)-1)//batch_size + 1}"
f"Processed batch {i // batch_size + 1} of {(len(axons) - 1) // batch_size + 1}"
)

return all_responses
Expand Down Expand Up @@ -1116,7 +1111,7 @@ async def _update_task_results(
for i in range(0, len(task.miner_responses), batch_size):
batch = task.miner_responses[i : i + batch_size]

logger.debug(f"Processing batch {i//batch_size + 1} of {num_batches}")
logger.debug(f"Processing batch {i // batch_size + 1} of {num_batches}")

tasks = [
self._update_miner_response(miner_response, obfuscated_to_real_model_id)
Expand Down Expand Up @@ -1313,7 +1308,7 @@ async def _update_miner_raw_scores_batch(
)
else:
logger.warning(
f"Retrying {len(failed_indices)} failed updates, attempt {attempt+2}/{max_retries}"
f"Retrying {len(failed_indices)} failed updates, attempt {attempt + 2}/{max_retries}"
)
remaining_responses = [
remaining_responses[i] for i in failed_indices
Expand All @@ -1326,7 +1321,7 @@ async def _update_miner_raw_scores_batch(
f"Error updating miner completions batch after {max_retries} attempts: {e}"
)
else:
logger.warning(f"Error during attempt {attempt+1}, retrying: {e}")
logger.warning(f"Error during attempt {attempt + 1}, retrying: {e}")
await asyncio.sleep(2**attempt)

async def _score_task(
Expand Down Expand Up @@ -1401,85 +1396,8 @@ async def _score_task(
hotkeys=list(hotkey_to_completion_responses.keys()),
)

# TODO: Remove wandb logging and save to db instead
# criteria_to_miner_score = {}
# asyncio.create_task(
# self._log_wandb(task, criteria_to_miner_score, updated_hotkey_to_scores)
# )

return task.validator_task.task_id, hotkey_to_scores

async def _log_wandb(
self,
task: DendriteQueryResponse,
criteria_to_miner_score: dict,
hotkey_to_score: dict,
):
"""Log the task results to wandb for visualization."""
if not criteria_to_miner_score.values() or not hotkey_to_score:
logger.warning(
"📝 No criteria to miner scores available. Skipping calculating averages for wandb."
)
return

mean_weighted_consensus_scores = (
torch.stack(
[
miner_scores.consensus.score
for miner_scores in criteria_to_miner_score.values()
]
)
.mean(dim=0)
.tolist()
)

mean_weighted_gt_scores = (
torch.stack(
[
miner_scores.ground_truth
for miner_scores in criteria_to_miner_score.values()
]
)
.mean(dim=0)
.tolist()
)

logger.info(
f"📝 Mean miner scores across different criteria: consensus shape:{mean_weighted_consensus_scores}, gt shape:{mean_weighted_gt_scores}"
)

score_data = {
"scores_by_hotkey": [hotkey_to_score],
"mean": {
"consensus": mean_weighted_consensus_scores,
"ground_truth": mean_weighted_gt_scores,
},
"hotkey_to_dojo_task_scores_and_gt": await self._get_dojo_task_scores_and_gt(
task.miner_responses
),
}

wandb_data = jsonable_encoder(
{
"request_id": task.validator_task.task_id,
"task": task.validator_task.task_type,
"criteria": (
task.validator_task.completion_responses[0].criteria_types
if task.validator_task.completion_responses
else []
),
"prompt": task.validator_task.prompt,
"completions": jsonable_encoder(
task.validator_task.completion_responses
),
"num_completions": len(task.validator_task.completion_responses or []),
"scores": score_data,
"num_responses": len(task.miner_responses),
}
)

wandb.log(wandb_data, commit=True)

async def _get_dojo_task_scores_and_gt(
self, miner_responses: List[TaskSynapseObject]
):
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ dependencies = [
"torch==2.3.1+cpu; sys_platform == 'linux'",
"torch==2.3.1; sys_platform == 'darwin'",
"uvicorn==0.22.0",
"wandb==0.17.4",
"redis==5.0.7",
"prisma==0.15.0",
"beautifulsoup4==4.12.3",
Expand Down Expand Up @@ -162,7 +161,7 @@ unfixable = []

[tool.ruff.lint.isort]
# this explicitly tells isort to treat wandb as a third-party package
known-third-party = ["wandb"]
known-third-party = []

[tool.setuptools]
packages = {find = {}}
Expand Down

0 comments on commit 78e23dd

Please sign in to comment.