Skip to content

Commit

Permalink
Finished DataProcessor
Browse files Browse the repository at this point in the history
  • Loading branch information
caymansimpson committed Aug 29, 2024
1 parent c80d976 commit 801d9dd
Show file tree
Hide file tree
Showing 11 changed files with 527 additions and 557 deletions.
15 changes: 14 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# -*- coding: utf-8 -*-
import os
import pickle

import orjson
from pytest import fixture

FIXTURE_DIR = os.path.join("data/fixture")
FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data/fixture")


@fixture
Expand Down Expand Up @@ -83,3 +84,15 @@ def uturn_logs():
continue
events.append(orjson.loads(clean_logs(line)))
return events


@fixture
def vgc_battle_p1():
with open(os.path.join(FIXTURE_DIR, "vgcp1battle.pickle"), "rb") as f:
return pickle.loads(f.read())


@fixture
def vgc_battle_p2():
with open(os.path.join(FIXTURE_DIR, "vgcp2battle.pickle"), "rb") as f:
return pickle.loads(f.read())
Binary file not shown.
Binary file not shown.
Binary file added data/fixture/test_battledata/test_write.pickle
Binary file not shown.
Binary file not shown.
Binary file added data/fixture/vgcp1battle.pickle
Binary file not shown.
Binary file added data/fixture/vgcp2battle.pickle
Binary file not shown.
20 changes: 7 additions & 13 deletions src/elitefurretai/model_utils/MODEL_UTILS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,11 @@ Here, we are going to build modules which we'll use to train our various models,

## Classes in this folder
1. `BattleData` -- a dataclass that stores all relevant information about a Battle. This class can be used to recreate battle objects turn-by-turn. This class is meant to expedite model-training, by storing info from the json, and being able to convert it into training data (it's a read-only class)
2. `DataProcessor` -- takes in showdown data (via json returned from showdown, ingested into poke-env) and converts these to BattleData classes.
2. `DataProcessor` -- takes in either self-play data, showdown data or anonymized showdown data and converts them into a `BattleData` that can be used for training. Also supports reading and writing compressed `BattleData` for storage.
3. `Embedder` -- prepares a battlestate for input into a neural network by translating the state into a series of integers
4. `ModelTrainer` -- TBD; will generate labels and training data for BattleData objects. Should contain abstract (create_labels... )

## How BattleData, JSON logs and Battle work
We store battle data recorded in Json in either of two formats:
1. Anonymized Showdown data
2. Our own version of data, which is similar to Showdown data with four exceptions:
- We dont store nature/iv/ev, just the mon's raw stats
- We store teampreview teams directly instead of through inputlogs
- We don't record inputlogs
- We mark each one we wrote with `eliteFurretAIGenerated`

We use `DataProcessor.json_to_battle:` to convert JSONs to battle. We use this to process the json objects and turn them into `BattleData` which can be used by models for learning

In self-play, we can use `DataProcessor.battle_to_json` to convert our battles to json for storage/training.
# How ModelTrainer works
- battledata_to_training
- battledata_to_inference
- omniscient or non-omniscient data
30 changes: 25 additions & 5 deletions src/elitefurretai/model_utils/battle_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@

@dataclass
class BattleData:
SOURCE_EFAI = "elitefurretai"
SOURCE_SHOWDOWN_ANON = "showdown_anon"
SOURCE_SHOWDOWN = "showdown"

roomid: str
format: str

Expand All @@ -21,29 +25,45 @@ class BattleData:
p1rating: Union[int, Dict[str, int]]
p2rating: Union[int, Dict[str, int]]

# Not always four when unobservable information
p1_team: List[ObservedPokemon]
p2_team: List[ObservedPokemon]

p1_teampreview_team: List[ObservedPokemon]
p2_teampreview_team: List[ObservedPokemon]

score: List[int]
score: List[int] # Represents non-fainted mons at end of battle for each player
winner: str
end_type: str

# Turn --> Observation for that turn; these are from the perspective of one player
# and so arent omniscient. But with the information from p1_teampreview_team and p1_team
# we can reconstruct a battle with omnscience (we need both teampreview and team because
# gender, if not explicitly assigned, will be generated at random)
observations: Dict[int, Observation]

inputs: List[str]

# Where the log initially came from. Can be any of the class
# variables. If showdown_anon, we don't get requests. If showdown,
# it's incomplete information
source: str

@staticmethod
def observed_pokemon_to_pokemon(omon: ObservedPokemon) -> Pokemon:
mon = Pokemon(gen=6, species=omon.species)
mon._item = omon.item
mon._level = omon.level
mon._moves = omon.moves
mon._ability = omon.ability
stats = omon.stats # Broken right now; we don't stoer hp
if not mon._last_request:
mon._last_request = {}
mon._last_request["stats"] = stats
mon._terastallized_type = omon.tera_type

mon._stats = {}
if omon.stats:
for stat in omon.stats:
if isinstance(omon.stats[stat], int):
mon._stats[stat] = omon.stats[stat] # pyright: ignore

mon._gender = omon.gender
mon._shiny = omon.shiny
return mon
Loading

0 comments on commit 801d9dd

Please sign in to comment.