Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
# Conflicts:
#	DeepCrazyhouse/configs/train_config.py
#	DeepCrazyhouse/src/preprocessing/pgn_converter_util.py
#	DeepCrazyhouse/src/preprocessing/pgn_to_planes_converter.py
#	DeepCrazyhouse/src/training/train_cnn.ipynb
#	DeepCrazyhouse/src/training/trainer_agent_pytorch.py
#	engine/src/agents/agent.cpp
#	engine/src/agents/agent.h
#	engine/src/agents/mctsagent.cpp
#	engine/src/agents/mctsagent.h
#	engine/src/agents/rawnetagent.cpp
#	engine/src/agents/rawnetagent.h
#	engine/src/environments/chess_related/board.cpp
#	engine/src/environments/chess_related/boardstate.h
#	engine/src/nn/neuralnetapi.cpp
#	engine/src/nn/neuralnetapiuser.cpp
#	engine/src/nn/neuralnetapiuser.h
#	engine/src/searchthread.cpp
#	engine/src/searchthread.h
#	engine/src/uci/crazyara.cpp
  • Loading branch information
HelpstoneX committed Sep 23, 2024
2 parents 40b15ff + 09b5b5a commit ab8a7a7
Show file tree
Hide file tree
Showing 58 changed files with 2,963 additions and 478 deletions.
8 changes: 3 additions & 5 deletions DeepCrazyhouse/configs/main_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
@author: queensgambit
Main Config definition file used for the project.
Copy and rename this file to main_config.py and adjust the paths accordingly.
Adjust the paths accordingly.
"""


Expand All @@ -23,8 +23,6 @@


main_config = {
# Copy and rename this file to main_config.py

# Crazyhouse - LichessDB
# The training directory includes games from the months: 2016-01 - 2018-07 (excluding 2018-04 and 2018-08)
# The validation directory includes games from the month: 2018-04
Expand Down Expand Up @@ -55,7 +53,7 @@
"planes_mate_in_one_dir": default_dir + f"planes/{phase_definition}/phase{phase}/mate_in_one/",

# The rec directory contains the plane representation which are used in the training loop of the network
# use the the notebook create_rec_dataset to generate the .rec files:
# use the notebook create_rec_dataset to generate the .rec files:
# (Unfortunately when trying to start training with the big dataset a memory overflow occurred.
# therefore the old working solution was used to train the latest model by loading the dataset via batch files)
# "train.idx", "val.idx", "test.idx", "mate_in_one.idx", "train.rec", "val.rec", "test.rec", "mate_in_one.rec"
Expand Down Expand Up @@ -83,7 +81,7 @@
# Active mode for different input & output representations.
# Each mode is only compatible with a certain network input-/output representation:
# Available modes: 0: MODE_CRAZYHOUSE (crazyhouse only mode, no 960) available versions [1, 2, 3]
# 1: MODE_LICHESS (all available lichess variants) available versions [1, 2 (last_moves)]
# 1: MODE_LICHESS (all available lichess variants) available versions [1, 2 (last_moves), 3 (last_moves+fx-features)]
# 2: MODE_CHESS (chess only mode, with 960) available versions [1, 2, 3]
"mode": 2,
"version": 3,
Expand Down
57 changes: 57 additions & 0 deletions DeepCrazyhouse/configs/model_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
@file: model_config.py
Created on 05.12.23
@project: CrazyAra
@author: queensgambit
Model configuration file
"""

from dataclasses import dataclass


@dataclass
class ModelConfig:
"""Class which stores all model configurations.
Note not all models support all parameters."""

# all models
channels: int = 256,
channels_value_head: int = 8,
channels_policy_head: int = 81,
value_fc_size: int = 256,

# resent
num_res_blocks: int = 19,

# risev2, risev3, alphavile
channels_operating_init: int = 224,
channel_expansion: int = 32,

kernels = [3] * 15,
se_types = [None] * len(kernels),
act_types = ['relu'] * len(kernels),

# alphavile
use_transformers = [False] * len(kernels)

path_dropout: float = 0.05,
kernel_5_channel_ratio: float = 0.5,

# nextvit
stage3_repeat: int = 1

def __init__(self):
self.kernels[7] = 5
self.kernels[11] = 5
self.kernels[12] = 5
self.kernels[13] = 5

self.se_types[5] = "eca_se"
self.se_types[8] = "eca_se"
self.se_types[12] = "eca_se"
self.se_types[13] = "eca_se"
self.se_types[14] = "eca_se"

self.use_transformers[7] = True
self.use_transformers[14] = True
182 changes: 130 additions & 52 deletions DeepCrazyhouse/configs/train_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,120 +11,198 @@

@dataclass
class TrainConfig:
"""Class which stores all training configuration"""
"""Class which stores all training configurations"""

# div factor is a constant which can be used to reduce the batch size and learning rate respectively
# use a value higher 1 if you encounter memory allocation errors
div_factor: int = 2
info_div_factor: str = "div factor is a constant which can be used to reduce the batch size and learning rate" \
" respectively use a value higher 1 if you encounter memory allocation errors"
div_factor: int = 1

# 1024 # the batch_size needed to be reduced to 1024 in order to fit in the GPU 1080Ti
# 4096 was originally used in the paper -> works slower for current GPU
# 2048 was used in the paper Mastering the game of Go without human knowledge and fits in GPU memory
# typically if you half the batch_size you should double the lr
info_batch_size: str = "batch size used during training. The batch-size may need to be reduced in order to fit on" \
" your GPU memory. 4096 was originally used in the paper, 2048 was used in the paper" \
" 'Mastering the game of Go without human knowledge'. Typically if you half the batch_size" \
" you should double the learning rate."
batch_size: int = int(1024 / div_factor)

# batch_steps = 1000 means for example that every 1000 batches the validation set gets processed
# this defines how often a new checkpoint will be saved and the metrics evaluated
batch_steps: int = 100 * div_factor
info_batch_steps: str = "batch_steps = 1000 means for example that every 1000 batches the validation set is" \
" processed. It defines how often a new checkpoint will be saved and the metrics evaluated"
batch_steps: int = 1000 * div_factor

# set the context on CPU switch to GPU if there is one available (strongly recommended for training)
info_context: str = "context defines the computation device to use for training. Set the context to to 'gpu' if" \
" there is one available, otherwise you may train on 'cpu' instead."
context: str = "gpu"

info_cpu_count: str = "cpu_count defines the number of cpu cores to use for data processing while training."
cpu_count: int = 4

info_device_id: str = "device_id sets the GPU device to use for training."
device_id: int = 0

info_discount: str = "discount describes the discounting value to use for discounting the value target " \
"until reaching the final terminal value."
discount: float = 1.0

info_dropout_rate: str = "dropout_rate describes the drobout percentage as used in the neural network architecture."
dropout_rate: float = 0

# directory to write and read weight, log, onnx and other export files
info_export_dir: str = "export_dir sets the directory to write and read weights, log, onnx and other export logging" \
" files"
export_dir: str = "./"

info_export_weights: str = "export_weights is a boolean to decide if the neural network weights should be exported" \
"during training."
export_weights: bool = True

info_export_grad_histograms: str = "export_grad_histograms enables or disable the export of gradient diagrams " \
"during training."
export_grad_histograms: bool = True

# Decide between 'pytorch', 'mxnet' and 'gluon' style for training
# Reinforcement Learning only works with gluon and pytorch atm
info_framework: str = "framework sets the deep learning framework to use. Currently only 'pytorch' is available." \
"mxnet and gluon have been deprecated."
framework: str = 'pytorch'

# Boolean if the policy data is also defined in select_policy_from_plane representation
info_is_policy_from_plane_data: str = "is_policy_from_plane_data is a boolean to decide if the policy data is" \
" already defined in select_policy_from_plane / plane representation."
is_policy_from_plane_data: bool = False

info_log_metrics_to_tensorboard: str = "log_metrics_to_tensorboard decides if the metrics should be exported with" \
"tensorboard."
log_metrics_to_tensorboard: bool = True

# k_steps_initial defines how many steps have been trained before
# (k_steps_initial != 0 if you continue training from a checkpoint)
info_model_type: str = "model_type defines the Model type that used during training (e.g. resnet, vit, risev2," \
" risev3, alphavile, alphavile-tiny, alphavile-small, alphavile-normal, alphavile-large," \
" NextViT)"
model_type: str = "resnet"

info_k_steps_initial: str = "k_steps_initial defines how many steps have been trained before (k_steps_initial != 0 if" \
" you continue training from a checkpoint)" \
" (TODO: Continuing training from a previous checkpoint is currently not available in" \
" pytorch training loop.)"
k_steps_initial: int = 0
# these are the weights to continue training with
# symbol_file = 'model_init-symbol.json' # model-1.19246-0.603-symbol.json'
# tar_file = 'model_init-0000.params' # model-1.19246-0.603-0223.params'
symbol_file: str = ''

info_tar_file: str = "tar_file is the neural network weight file to continue training with"
tar_file: str = ''

# # optimization parameters
info_optimizer_name: str = "optimizer_name is the optimizer that used in the training loop to update the weights." \
"(e.g. nag, sgd, adam, adamw)"
optimizer_name: str = "nag"
max_lr: float = 0.1 / div_factor # 0.35 / div_factor
min_lr: float = 0.00001 / div_factor # 0.2 / div_factor # 0.00001

info_max_lr: str = "max_lr defines the maximum learning rate used for training."
max_lr: float = 0.07 / div_factor
info_min_lr: str = "min_lr defines the minimum learning rate used for training."
min_lr: float = 0.00001 / div_factor

if "adam" in optimizer_name:
max_lr = 0.001001 # 1e-3
min_lr = 0.001

info_max_momentum: str = "max_momentum defines the maximum momentum factor used during training (only applicable to" \
"optimizers that are momentum based)"
max_momentum: float = 0.95
info_min_momentum: str = "min_momentum defines the minimum momentum factor used during training (only applicable to" \
"optimizers that are momentum based)"
min_momentum: float = 0.8
# stop training as soon as max_spikes has been reached

info_max_spikes: str = "max_spikes defines the maximum number of spikes. Training is stopped as soon as max_spikes" \
" has been reached."
max_spikes: int = 20

# name initials which are used to identify running training processes with rtpt
# prefix for the process name in order to identify the process on a server
info_name_initials: str = "name_initials sets the name initials which are used to identify running training" \
" processes with rtpt. It is used as a prefix for the process name in order to identify" \
" the process on a server."
name_initials: str = "XX"

info_nb_parts: str = "nb_parts sets the number of training zip files used for training. This value is normally " \
"dynamically set before training based on the number of .zip files available in the training " \
"directory."
nb_parts: int = None

info_normalize: str = "normalize decides if the training data should be normalized to the range of [0,1]."
normalize: bool = True # define whether to normalize input data to [01]

# how many epochs the network will be trained each time there is enough new data available
nb_training_epochs: int = 1
info_nb_training_epochs: str = "nb_training_epochs defines how many epoch iterations the network will be trained."
nb_training_epochs: int = 7

policy_loss_factor: float = 0.5 # 0.99
info_plys_to_end_loss_factor: str = "plys_to_end_loss_factor defines the gradient scaling for the plys to end" \
" output."
plys_to_end_loss_factor: float = 0.002

# gradient scaling for the plys to end output
plys_to_end_loss_factor: float = 0.1
info_q_value_ratio: str = "q_value_ratio defines the ratio for mixing the value return with the corresponding " \
"q-value for a ratio of 0 no q-value information will be used."
q_value_ratio: float = 0.0

# ratio for mixing the value return with the corresponding q-value
# for a ratio of 0 no q-value information will be used
q_value_ratio: float = 0.15

# set a specific seed value for reproducibility
info_seed: str = "seed sets a specific seed value for reproducibility."
seed: int = 42

# Boolean if potential legal moves will be selected from final policy output
info_select_policy_from_plane: str = "select_policy_from_plan defines if potential legal moves will be selected" \
" from final policy output in plane representation / convolution " \
"representation rather than a flat representation."
select_policy_from_plane: bool = True

# define spike threshold when the detection will be triggered
info_spike_thresh: str = "spike_thresh defines the spike threshold when the detection will be triggered. It is" \
" triggered when last_loss x spike_thresh < current_loss."
spike_thresh: float = 1.5

# Boolean if the policy target is one-hot encoded (sparse=True) or a target distribution (sparse=False)
sparse_policy_label: bool = False
info_sparse_policy_label: str = "sparse_policy_label defines if the policy target is one-hot encoded (sparse=True)" \
" or a target distribution (sparse=False)"
sparse_policy_label: bool = True

# total of training iterations
info_total_it: str = "total_it defines the total number of training iterations. Usually this value is determined by" \
"dynamically based on the number of zip files and the number of samples in the validation file."
total_it: int = None

# adds a small mlp to infer the value loss from wdl and plys_to_end_output
info_use_custom_architecture: str = "use_custom_architecture decides if a custom network architecture should be " \
"used, defined in the model_config.py file"
use_custom_architecture: bool = False

info_use_mlp_wdl_ply: str = "use_mlp_wdl_ply adds a small mlp to infer the value loss from wdl and plys_to_end" \
"_output"
use_mlp_wdl_ply: bool = False
# enables training with ply to end head
use_plys_to_end: bool = False
# enables training with a wdl head as intermediate target (mainly useful for environments with 3 outcomes)
use_wdl: bool = False
info_use_plys_to_end: str = "use_plys_to_end enables training with the plys to end head."
use_plys_to_end: bool = True
info_use_wdl: str = "use_wdl enables training with a wdl head as intermediate target (mainly useful for" \
" environments with three outcomes WIN, DRAW, LOSS)"
use_wdl: bool = True

# loads a previous checkpoint if the loss increased significantly
info_use_spike_recovery: str = "use_spike_recovery loads a previous checkpoint if the loss increased significantly."
use_spike_recovery: bool = True
# weight the value loss a lot lower than the policy loss in order to prevent overfitting
val_loss_factor: float = 0.5 # 0.01
# weight for the wdl loss
wdl_loss_factor: float = 0.4
info_val_loss_factor: str = "val_loss_factor weights the value loss a lot lower than the policy loss in order to" \
" prevent overfitting"
val_loss_factor: float = 0.01
info_policy_loss_factor: str = "policy_loss_factor defines the weighting factor for the policy loss."
policy_loss_factor: float = 0.988 if use_plys_to_end else 0.99

# weight decay
info_wdl_loss_factor: str = "wdl_loss_factor defines the weighting factor for the wdl-loss."
wdl_loss_factor: float = 0.01

info_wd: str = "wd defines the weight decay value for regularization as a measure to prevent overfitting."
wd: float = 1e-4


def rl_train_config():
tc = TrainConfig()

tc.export_grad_histograms = True
tc.div_factor = 2
tc.batch_steps = 100 * tc.div_factor
tc.batch_size = int(1024 / tc.div_factor)

tc.max_lr = 0.1 / tc.div_factor
tc.min_lr = 0.00001 / tc.div_factor

tc.val_loss_factor = 0.499 if tc.use_plys_to_end else 0.5
tc.policy_loss_factor = 0.499 if tc.use_plys_to_end else 0.5
tc.plys_to_end_loss_factor = 0.002
tc.wdl_loss_factor = 0.499 if tc.use_plys_to_end else 0.5

tc.nb_training_epochs = 1 # define how many epochs the network will be trained
tc.q_value_ratio = 0.15
tc.sparse_policy_label = False

return tc


@dataclass
class TrainObjects:
"""Defines training objects which must be set before the training"""
Expand Down
6 changes: 3 additions & 3 deletions DeepCrazyhouse/src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
The former python MCTS version of this project uses MXNet. To install MXNet look at
[https://mxnet.apache.org/](https://mxnet.apache.org/)

Copy and rename `DeepCrazyhouse/configs/main_config_sample.py` to `DeepCrazyhouse/configs/main_config.py`. Adjust the paths
to the correct location on your file system. The most important entries to change are `model_architecture_dir` and `model_weights_dir`.
Adjust the paths in `DeepCrazyhouse/configs/main_config.py` to the correct location on your file system.
The most important entries to change are `model_architecture_dir` and `model_weights_dir`.

## General project structure
The source code is divided into four main parts: model, training, experiments and
samples. The different parts are located in identically named folders.

The `domain` folder contains domain classes, which present the problem domain of Crazyhouse.
We define 'domain' in the sence of a data class (as it is commonly defined as
We define 'domain' in the sense of a data class (as it is commonly defined as
e.g. in the Model-View-Controller pattern).
This includes classes to load, store and (pre)process game data, as well as
classes to analyse, evaluate, or modify board positions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def forward(self, x):

def get_alpha_zero_model(args):
"""
Wrapper definition for AlphaVile models
Wrapper definition for the AlphaZero model
:param args: Argument dictionary
:return: pytorch model object
"""
Expand Down
Loading

0 comments on commit ab8a7a7

Please sign in to comment.