Finished DataProcessor

caymansimpson · Aug 29, 2024 · 801d9dd · 801d9dd
1 parent c80d976
commit 801d9dd
Show file tree

Hide file tree

Showing 11 changed files with 527 additions and 557 deletions.
diff --git a/conftest.py b/conftest.py
@@ -1,10 +1,11 @@
 # -*- coding: utf-8 -*-
 import os
+import pickle
 
 import orjson
 from pytest import fixture
 
-FIXTURE_DIR = os.path.join("data/fixture")
+FIXTURE_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data/fixture")
 
 
 @fixture
@@ -83,3 +84,15 @@ def uturn_logs():
                 continue
             events.append(orjson.loads(clean_logs(line)))
         return events
+
+
+@fixture
+def vgc_battle_p1():
+    with open(os.path.join(FIXTURE_DIR, "vgcp1battle.pickle"), "rb") as f:
+        return pickle.loads(f.read())
+
+
+@fixture
+def vgc_battle_p2():
+    with open(os.path.join(FIXTURE_DIR, "vgcp2battle.pickle"), "rb") as f:
+        return pickle.loads(f.read())
diff --git a/data/fixture/test_battledata/single_battle_json_anon.pickle b/data/fixture/test_battledata/single_battle_json_anon.pickle
diff --git a/data/fixture/test_battledata/test_online_showdown.pickle b/data/fixture/test_battledata/test_online_showdown.pickle
diff --git a/data/fixture/test_battledata/test_write.pickle b/data/fixture/test_battledata/test_write.pickle
diff --git a/data/fixture/test_battledata/tests_self_play.pickle b/data/fixture/test_battledata/tests_self_play.pickle
diff --git a/data/fixture/vgcp1battle.pickle b/data/fixture/vgcp1battle.pickle
diff --git a/data/fixture/vgcp2battle.pickle b/data/fixture/vgcp2battle.pickle
diff --git a/src/elitefurretai/model_utils/MODEL_UTILS.md b/src/elitefurretai/model_utils/MODEL_UTILS.md
@@ -13,17 +13,11 @@ Here, we are going to build modules which we'll use to train our various models,
 
 ## Classes in this folder
 1. `BattleData` -- a dataclass that stores all relevant information about a Battle. This class can be used to recreate battle objects turn-by-turn. This class is meant to expedite model-training, by storing info from the json, and being able to convert it into training data (it's a read-only class)
-2. `DataProcessor` -- takes in showdown data (via json returned from showdown, ingested into poke-env) and converts these to BattleData classes.
+2. `DataProcessor` -- takes in either self-play data, showdown data or anonymized showdown data and converts them into a `BattleData` that can be used for training. Also supports reading and writing compressed `BattleData` for storage.
+3. `Embedder` -- prepares a battlestate for input into a neural network by translating the state into a series of integers
+4. `ModelTrainer` -- TBD; will generate labels and training data for BattleData objects. Should contain abstract (create_labels... )
 
-## How BattleData, JSON logs and Battle work
-We store battle data recorded in Json in either of two formats:
-1. Anonymized Showdown data
-2. Our own version of data, which is similar to Showdown data with four exceptions:
-    - We dont store nature/iv/ev, just the mon's raw stats
-    - We store teampreview teams directly instead of through inputlogs
-    - We don't record inputlogs
-    - We mark each one we wrote with `eliteFurretAIGenerated`
-
-We use `DataProcessor.json_to_battle:` to convert JSONs to battle. We use this to process the json objects and turn them into `BattleData` which can be used by models for learning
-
-In self-play, we can use `DataProcessor.battle_to_json` to convert our battles to json for storage/training.
+# How ModelTrainer works
+- battledata_to_training
+- battledata_to_inference
+- omniscient or non-omniscient data
diff --git a/src/elitefurretai/model_utils/battle_data.py b/src/elitefurretai/model_utils/battle_data.py
@@ -12,6 +12,10 @@
 
 @dataclass
 class BattleData:
+    SOURCE_EFAI = "elitefurretai"
+    SOURCE_SHOWDOWN_ANON = "showdown_anon"
+    SOURCE_SHOWDOWN = "showdown"
+
     roomid: str
     format: str
 
@@ -21,29 +25,45 @@ class BattleData:
     p1rating: Union[int, Dict[str, int]]
     p2rating: Union[int, Dict[str, int]]
 
+    # Not always four when unobservable information
     p1_team: List[ObservedPokemon]
     p2_team: List[ObservedPokemon]
 
     p1_teampreview_team: List[ObservedPokemon]
     p2_teampreview_team: List[ObservedPokemon]
 
-    score: List[int]
+    score: List[int]  # Represents non-fainted mons at end of battle for each player
     winner: str
     end_type: str
 
+    # Turn --> Observation for that turn; these are from the perspective of one player
+    # and so arent omniscient. But with the information from p1_teampreview_team and p1_team
+    # we can reconstruct a battle with omnscience (we need both teampreview and team because
+    # gender, if not explicitly assigned, will be generated at random)
     observations: Dict[int, Observation]
 
+    inputs: List[str]
+
+    # Where the log initially came from. Can be any of the class
+    # variables. If showdown_anon, we don't get requests. If showdown,
+    # it's incomplete information
+    source: str
+
     @staticmethod
     def observed_pokemon_to_pokemon(omon: ObservedPokemon) -> Pokemon:
         mon = Pokemon(gen=6, species=omon.species)
         mon._item = omon.item
         mon._level = omon.level
         mon._moves = omon.moves
         mon._ability = omon.ability
-        stats = omon.stats  # Broken right now; we don't stoer hp
-        if not mon._last_request:
-            mon._last_request = {}
-        mon._last_request["stats"] = stats
+        mon._terastallized_type = omon.tera_type
+
+        mon._stats = {}
+        if omon.stats:
+            for stat in omon.stats:
+                if isinstance(omon.stats[stat], int):
+                    mon._stats[stat] = omon.stats[stat]  # pyright: ignore
+
         mon._gender = omon.gender
         mon._shiny = omon.shiny
         return mon