-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add validator api dataset service, extract dataset script
build: add deps for dataset service build: add docker compose services & entrypoints
- Loading branch information
1 parent
17b0068
commit b6eb648
Showing
8 changed files
with
657 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
FROM python:3.11-slim-bookworm | ||
|
||
WORKDIR /app | ||
|
||
ENV PATH="/root/.cargo/bin/:$PATH" | ||
ENV UV_SYSTEM_PYTHON=true | ||
ENV NVM_DIR=/root/.nvm | ||
ENV NODE_VERSION=v20.11.1 | ||
ENV NODE_PATH=$NVM_DIR/versions/node/$NODE_VERSION/lib/node_modules | ||
ENV PATH=$NVM_DIR/versions/node/$NODE_VERSION/bin:$PATH | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y --no-install-recommends \ | ||
build-essential curl git ca-certificates \ | ||
&& apt-get clean \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv | ||
COPY . . | ||
|
||
ARG TARGETPLATFORM | ||
|
||
RUN echo "Building for TARGETPLATFORM: $TARGETPLATFORM" | ||
|
||
RUN git config --global --add safe.directory /app | ||
|
||
# jank because pytorch has different versions for cpu for darwin VS linux, see pyproject.toml for specifics | ||
# RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ | ||
# uv pip install --no-cache -e .[dataset] --find-links https://download.pytorch.org/whl/torch_stable.html; \ | ||
# else \ | ||
# uv pip install --no-cache -e .[dataset]; \ | ||
# fi | ||
RUN uv pip install --no-cache -e ".[dataset]" --find-links https://download.pytorch.org/whl/torch_stable.html; | ||
|
||
ENTRYPOINT ["./entrypoints.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
import asyncio | ||
import os | ||
from typing import List | ||
|
||
import aioboto3 | ||
import aiofiles | ||
import bittensor as bt | ||
import httpx | ||
import uvicorn | ||
from bittensor.btlogging import logging as logger | ||
from fastapi import FastAPI, File, Form, HTTPException, UploadFile | ||
from fastapi.middleware.cors import CORSMiddleware | ||
from substrateinterface import Keypair | ||
|
||
from commons.objects import ObjectManager | ||
from dojo import VALIDATOR_MIN_STAKE | ||
|
||
app = FastAPI(title="Dataset Upload Service") | ||
app.add_middleware( | ||
CORSMiddleware, | ||
allow_origins=["*"], | ||
allow_credentials=True, | ||
allow_methods=["*"], | ||
allow_headers=["*"], | ||
) | ||
config = ObjectManager.get_config() | ||
subtensor = bt.subtensor(config=config) | ||
metagraph = subtensor.metagraph(netuid=52, lite=True) | ||
AWS_REGION = os.getenv("AWS_REGION") | ||
BUCKET_NAME = os.getenv("S3_BUCKET_NAME") | ||
MAX_CHUNK_SIZE_MB = int(os.getenv("MAX_CHUNK_SIZE_MB", 50)) | ||
|
||
|
||
def verify_signature(hotkey: str, signature: str, message: str) -> bool: | ||
keypair = Keypair(ss58_address=hotkey, ss58_format=42) | ||
if not keypair.verify(data=message, signature=signature): | ||
logger.error(f"Invalid signature for address={hotkey}") | ||
return False | ||
|
||
logger.success(f"Signature verified, signed by {hotkey}") | ||
return True | ||
|
||
|
||
def check_stake(hotkey: str) -> bool: | ||
uid = -1 | ||
try: | ||
uid = metagraph.hotkeys.index(hotkey) | ||
except ValueError: | ||
logger.error(f"Hotkey {hotkey} not found in metagraph") | ||
return False | ||
|
||
# Check if stake meets minimum threshold | ||
stake = metagraph.S[uid].item() | ||
|
||
if stake < VALIDATOR_MIN_STAKE: | ||
logger.error( | ||
f"Insufficient stake for hotkey {hotkey}: {stake} < {VALIDATOR_MIN_STAKE}" | ||
) | ||
return False | ||
|
||
logger.info(f"Stake check passed for {hotkey} with stake {stake}") | ||
return True | ||
|
||
|
||
@app.post("/upload_dataset") | ||
async def upload_dataset( | ||
hotkey: str = Form(...), | ||
signature: str = Form(...), | ||
message: str = Form(...), | ||
files: List[UploadFile] = File(...), | ||
): | ||
try: | ||
if not signature.startswith("0x"): | ||
raise HTTPException( | ||
status_code=401, detail="Invalid signature format, must be hex." | ||
) | ||
|
||
session = aioboto3.Session(region_name=AWS_REGION) | ||
async with session.resource("s3") as s3: | ||
bucket = await s3.Bucket(BUCKET_NAME) | ||
for file in files: | ||
content = await file.read() | ||
file_size = len(content) | ||
if file_size > MAX_CHUNK_SIZE_MB * 1024 * 1024: # 50MB in bytes | ||
raise HTTPException( | ||
status_code=413, | ||
detail=f"File too large. Maximum size is {MAX_CHUNK_SIZE_MB}MB", | ||
) | ||
|
||
filename = f"{file.filename}" | ||
|
||
await bucket.put_object( | ||
Key=filename, | ||
Body=content, | ||
) | ||
except Exception as e: | ||
logger.error(f"Error uploading dataset: {e}") | ||
raise HTTPException(status_code=500, detail=f"Error uploading dataset: {e}") | ||
|
||
return { | ||
"success": True, | ||
"message": "Files uploaded successfully", | ||
"filenames": [file.filename for file in files], | ||
} | ||
|
||
|
||
async def server(): | ||
config = uvicorn.Config(app, host="0.0.0.0", port=9999) | ||
server = uvicorn.Server(config) | ||
await server.serve() | ||
|
||
|
||
async def test_endpoint(): | ||
# Create test data | ||
test_data = { | ||
"hotkey": "asdfg", | ||
"signature": "0xasdfg", | ||
"message": "<Bytes>On 2024-12-02 18:15:23.663947 +08 Tensorplex is awesome</Bytes>", | ||
} | ||
# Create a temporary test file | ||
test_filename = "dataset_20241202.jsonl" | ||
|
||
# Build form data similar to how dojo.py does it | ||
files = [] | ||
|
||
# Add file to form data if it exists | ||
if os.path.exists(test_filename): | ||
async with aiofiles.open(test_filename, "rb") as f: | ||
file_content = await f.read() | ||
files.append(("files", (test_filename, file_content, "application/json"))) | ||
else: | ||
raise FileNotFoundError(f"Test file {test_filename} not found") | ||
|
||
# Make request using httpx | ||
async with httpx.AsyncClient() as client: | ||
response = await client.post( | ||
"http://localhost:8000/upload_dataset", | ||
data={ | ||
"hotkey": test_data["hotkey"], | ||
"signature": test_data["signature"], | ||
"message": test_data["message"], | ||
}, | ||
files=files, | ||
timeout=30.0, | ||
) | ||
print(f"Status: {response.status_code}") | ||
print(f"Response: {response.json()}") | ||
|
||
|
||
if __name__ == "__main__": | ||
import sys | ||
|
||
if "--test" in sys.argv: | ||
asyncio.run(test_endpoint()) | ||
else: | ||
asyncio.run(server()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.