Skip to content

Commit

Permalink
Merge pull request #122 from tensorplex-labs/dev
Browse files Browse the repository at this point in the history
chore(release): v1.6.2
  • Loading branch information
codebender37 authored Feb 13, 2025
2 parents e8bfcf8 + deac196 commit c8b50e5
Show file tree
Hide file tree
Showing 22 changed files with 959 additions and 544 deletions.
10 changes: 6 additions & 4 deletions .env.validator.example
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ SUBTENSOR_ENDPOINT=wss://entrypoint-finney.opentensor.ai:443
# NETUID=98
# SUBTENSOR_NETWORK=test
# SUBTENSOR_ENDPOINT=ws://testnet-lite:9944
# WANDB_PROJECT_NAME=dojo-testnet

WANDB_API_KEY=
WANDB_PROJECT_NAME=dojo-mainnet

# for dojo-synthetic-api
OPENROUTER_API_KEY=
Expand All @@ -43,3 +39,9 @@ DB_NAME=db
DB_USERNAME=
DB_PASSWORD=
DATABASE_URL=postgresql://${DB_USERNAME}:${DB_PASSWORD}@${DB_HOST}/${DB_NAME}


#dojo loki
DOJO_LOKI_URL=<GET_FROM_TPLX_TEAM>
# hotkey for loki external label
VALIDATOR_HOTKEY=
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ dojo-cli:
extract-dataset:
docker compose -f docker-compose.validator.yaml run --rm --remove-orphans extract-dataset

fill-score-column:
docker compose -f docker-compose.validator.yaml run --rm --remove-orphans fill-score-column

migration:
docker compose --env-file .env.validator -f docker-compose.validator.yaml run --rm migration

Expand Down
22 changes: 19 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
- [Auto-updater](#auto-updater)
- [Dojo CLI](#dojo-cli)
- [For Dojo developers](#for-dojo-developers)
- [Dataset Extraction](#dataset-extraction)
- [License](#license)

</details>
Expand Down Expand Up @@ -102,7 +103,6 @@ By creating an open platform for gathering human-generated datasets, Tensorplex
- docker
- GNU make
- openrouter api key
- wandb api key

### System Requirements

Expand Down Expand Up @@ -203,6 +203,9 @@ For Docker Compose installation, see https://docs.docker.com/compose/install/lin
# verify both docker and docker compose are installed
docker --version
docker compose version

# for validator please install docker loki plugin
docker plugin install grafana/loki-docker-driver:3.3.2-amd64 --alias loki --grant-all-permissions
```

4. Start local subtensor node (**optional**)
Expand Down Expand Up @@ -420,8 +423,6 @@ WALLET_COLDKEY=# the name of the coldkey
WALLET_HOTKEY=# the name of the hotkey
DATASET_SERVICE_BASE_URL=https://dojo-validator-api.tensorplex.ai

# head to https://wandb.ai/authorize to get your API key
WANDB_API_KEY="<wandb_key>"

# for dojo-synthetic-api
OPENROUTER_API_KEY="sk-or-v1-<KEY>"
Expand All @@ -438,6 +439,10 @@ DB_NAME=db
DB_USERNAME=#set a non-default username
DB_PASSWORD=#generate and set a secure password
DATABASE_URL=postgresql://${DB_USERNAME}:${DB_PASSWORD}@${DB_HOST}/${DB_NAME}

# dojo loki
DOJO_LOKI_URL=# get from TPLX TEAM
VALIDATOR_HOTKEY=# your running validator hotkey address
```

> **Note:** To ensure your validator runs smoothly, enable the auto top-up feature for Openrouter, this ensures that your validator will not fail to call synthetic API during task generation. The estimate cost of generating a task is approximately $0.20 USD.
Expand Down Expand Up @@ -571,6 +576,17 @@ make install-dev
make install-test
```

## Dataset Extraction

The dataset should be in different parts, currently `MAX_CHUNK_SIZE_MB` is set to 50MB on the dataset service, due to limitations on the load balancer. Use the commands to combine all into a single dataset file:

```bash
aws s3 cp s3://amzn-s3-demo-bucket1/ <PATH_ON_LOCAL> --recursive --exclude "*" --include "hotkey_<vali_hotkey>_dataset_20250212*.jsonl"
cd <PATH_ON_LOCAL>
# to merge all chunks into a single dataset file
cat *.jsonl > hotkey_<vali_hotkey>_dataset_combined.jsonl
```

# License

This repository is licensed under the MIT License.
Expand Down
77 changes: 0 additions & 77 deletions commons/logging/wandb.py

This file was deleted.

11 changes: 6 additions & 5 deletions commons/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ async def update_miner_task_results(
)
else:
logger.warning(
f"Retrying update, attempt {attempt+2}/{max_retries}"
f"Retrying update, attempt {attempt + 2}/{max_retries}"
)
await asyncio.sleep(2**attempt)

Expand All @@ -282,7 +282,7 @@ async def update_miner_task_results(
logger.error(f"Error updating task results: {e}")
else:
logger.warning(
f"Error during attempt {attempt+1}, retrying: {e}"
f"Error during attempt {attempt + 1}, retrying: {e}"
)
await asyncio.sleep(2**attempt)

Expand All @@ -300,6 +300,7 @@ async def update_miner_raw_scores(
max_retries: int = 20,
) -> tuple[bool, list[int]]:
"""Update the miner's provided raw scores for a list of miner responses.
NOTE: this is to be used when the task is first saved to validator's database.
Args:
miner_responses: List of TaskSynapseObject containing miner responses
Expand Down Expand Up @@ -403,18 +404,18 @@ async def update_miner_raw_scores(
)

logger.debug(
f"Updating completion responses: updated batch {batch_id+1}/{num_batches}"
f"Updating completion responses: updated batch {batch_id + 1}/{num_batches}"
)
break
except Exception as e:
if attempt == max_retries - 1:
logger.error(
f"Failed to update batch {batch_id+1}/{num_batches} after {max_retries} attempts: {e}"
f"Failed to update batch {batch_id + 1}/{num_batches} after {max_retries} attempts: {e}"
)
failed_batch_indices.extend(range(start_idx, end_idx))
else:
logger.warning(
f"Retrying batch {batch_id+1}/{num_batches}, attempt {attempt+2}/{max_retries}"
f"Retrying batch {batch_id + 1}/{num_batches}, attempt {attempt + 2}/{max_retries}"
)
await asyncio.sleep(2**attempt)

Expand Down
9 changes: 9 additions & 0 deletions database/mappers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json

import bittensor as bt
from pydantic import BaseModel

from commons.utils import datetime_to_iso8601_str, iso8601_str_to_datetime
from database.prisma import Json
Expand All @@ -13,6 +14,7 @@
MinerResponseCreateInput,
ValidatorTaskCreateInput,
)
from dojo import get_commit_hash, get_latest_git_tag
from dojo.protocol import (
CompletionResponse,
CriteriaType,
Expand All @@ -21,6 +23,11 @@
)


class Metadata(BaseModel):
git_tag: str
commit_hash: str


# ---------------------------------------------------------------------------- #
# MAP PROTOCOL OBJECTS TO DATABASE MODEL INPUTS #
# ---------------------------------------------------------------------------- #
Expand Down Expand Up @@ -49,6 +56,7 @@ def map_task_synapse_object_to_validator_task(
if synapse.ground_truth
else []
)
metadata = Metadata(git_tag=get_latest_git_tag(), commit_hash=get_commit_hash())

return ValidatorTaskCreateInput(
id=synapse.task_id,
Expand All @@ -59,6 +67,7 @@ def map_task_synapse_object_to_validator_task(
is_processed=False,
miner_responses={"create": []},
ground_truth={"create": ground_truths},
metadata=Json(json.dumps(metadata.model_dump())),
)


Expand Down
23 changes: 22 additions & 1 deletion docker-compose.validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ services:
logging: *default-logging

synthetic-api:
container_name: synthetic-api
image: ghcr.io/tensorplex-labs/dojo-synthetic-api:main
env_file:
- .env.validator
Expand Down Expand Up @@ -134,7 +135,12 @@ services:
condition: service_healthy
prisma-setup-vali:
condition: service_completed_successfully
logging: *default-logging
logging:
driver: loki
options:
mode: non-blocking
loki-url: "https://${DOJO_LOKI_URL}"
loki-external-labels: "validator=${VALIDATOR_HOTKEY}"

dataset-service:
container_name: dataset-service
Expand Down Expand Up @@ -196,3 +202,18 @@ services:
- prisma-pip-cache:/root/.cache/pip
- $HOME/.bittensor:/root/.bittensor
logging: *default-logging

fill-score-column:
container_name: fill-score-column
image: ghcr.io/tensorplex-labs/dojo:main
env_file:
- .env.validator
command: ["fill-score-column"]
networks:
- dojo-validator
volumes:
- ./:/app
- ./.env.validator:/app/.env
- prisma-binary:/root/prisma-python
- $HOME/.bittensor:/root/.bittensor
logging: *default-logging
7 changes: 7 additions & 0 deletions docker/Dockerfile.dataset
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ FROM python:3.11-slim-bookworm

WORKDIR /app

# Prisma-specific environment variables
ENV PRISMA_USE_NODEJS_BIN=true
ENV PRISMA_BINARY_PLATFORM=debian-openssl-3.0.x
ENV PRISMA_BINARY_CACHE_DIR=/root/prisma-python

ENV PATH="/root/.cargo/bin/:$PATH"
ENV UV_SYSTEM_PYTHON=true
ENV NVM_DIR=/root/.nvm
Expand All @@ -15,6 +20,8 @@ RUN apt-get update \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

RUN mkdir -p /root/prisma-python

COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
COPY . .

Expand Down
14 changes: 14 additions & 0 deletions dojo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,20 @@ def get_latest_git_tag():
raise RuntimeError("Failed to get latest Git tag")


def get_commit_hash():
try:
# Get the latest git commit hash
latest_commit_hash = (
subprocess.check_output(["git", "rev-parse", "HEAD"])
.strip()
.decode("utf-8")
)
return latest_commit_hash
except subprocess.CalledProcessError as e:
print(f"Error getting the latest Git commit hash: {e}")
raise RuntimeError("Failed to get latest Git commit hash")


# Define the version of the template module.
__version__ = get_latest_git_tag()
version_split = __version__.split(".")
Expand Down
8 changes: 7 additions & 1 deletion dojo/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ def __init__(self, netuid, n=16, wallet=None, network="mock"):
self.create_subnet(netuid)


class MockTerminalInfo(bt.TerminalInfo):
def __init__(self, hotkey):
super().__init__()
self.hotkey = hotkey


class MockMetagraph(bt.metagraph):
def __init__(self, netuid=1, network="mock", subtensor=None):
super().__init__(netuid=netuid, network=network, sync=False)
Expand Down Expand Up @@ -50,7 +56,7 @@ def hotkeys(self, value):
# self.total_stake = np.array(stakes, dtype=np.float32)


class MockDendrite(bt.dendrite):
class MockDendrite(bt.Dendrite):
"""
Replaces a real bittensor network request with a mock request that just returns some static response for all axons that are passed and adds some random delay.
"""
Expand Down
8 changes: 0 additions & 8 deletions dojo/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,14 +218,6 @@ def add_args(parser):
default=0.3,
)

wandb_project_names = ["dojo-devnet", "dojo-testnet", "dojo-mainnet"]
parser.add_argument(
"--wandb.project_name",
type=str,
choices=wandb_project_names,
help="Name of the wandb project to use.",
)

elif neuron_type == "miner":
pass

Expand Down
Loading

0 comments on commit c8b50e5

Please sign in to comment.