Skip to content

Commit

Permalink
Merge pull request #50 from kobanium/develop
Browse files Browse the repository at this point in the history
sente mate bug fix.
  • Loading branch information
yssaya authored Dec 11, 2020
2 parents cf845c6 + c39d4ef commit cf639aa
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 21 deletions.
1 change: 1 addition & 0 deletions learn/reduce.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.0
1 change: 1 addition & 0 deletions learn/sleep.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
120
8 changes: 6 additions & 2 deletions learn/yss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -987,8 +987,12 @@ int shogi::LoadCSA()
}
str[n-1] = 0;
if ( count==0 ) {
all_visit = atoi(str);
pz->v_playouts_sum.push_back(all_visit);
if ( strstr(str,"v=") ) {
count--;
} else {
all_visit = atoi(str);
pz->v_playouts_sum.push_back(all_visit);
}
} else {
if ( (count&1)== 0 ) {
if ( b0==0 && b1==0 ) debug();
Expand Down
62 changes: 48 additions & 14 deletions learn/yss_dcnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2447,13 +2447,13 @@ ZERO_DB zdb_one;
ZERO_DB zdb[ZERO_DB_SIZE];
int *pZDBsum = NULL;
int zdb_count = 0;
int zdb_count_start = 5200000; // 400万棋譜から読み込む場合は4000000
int zdb_count_start = 0; //23400000; //20300000; //18800000; //16400000; //10300000; //5200000; // 400万棋譜から読み込む場合は4000000
int zero_kif_pos_num = 0;
int zero_kif_games = 0;
const int MINI_BATCH = 128; // aoba_zero.prototxt の cross_entroy_scale も同時に変更すること!layerのnameも要変更
const int ONE_SIZE = DCNN_CHANNELS*B_SIZE*B_SIZE; // 362*9*9; *4= 117288 *64 = 7506432, 7MBにもなる mini_batch=64

const int fReplayLearning = 0; // すでに作られた棋譜からWindowをずらせて学習させる
const int fReplayLearning = 1; // すでに作られた棋譜からWindowをずらせて学習させる
const int fWwwSample = 0; // fReplayLearning も同時に1

//const char ZERO_KIF_DB_FILENAME[] = "zerokif.db";
Expand Down Expand Up @@ -2481,7 +2481,7 @@ const int USE_XZ_NONE = 0;
const int USE_XZ_POOL_ONLY = 1;
const int USE_XZ_BOTH = 2;

const int USE_XZ = USE_XZ_POOL_ONLY; // 1...poolのみ xz で。2...poolもarchiveも xz で
const int USE_XZ = USE_XZ_BOTH; // 1...poolのみ xz で。2...poolもarchiveも xz で

// archiveから棋譜番号=n の棋譜を取り出す。KifBuf[] に入る。速度無視。fpでは100番目以降が遅すぎて無理。
int find_kif_from_archive(int search_n)
Expand Down Expand Up @@ -2790,7 +2790,19 @@ int shogi::wait_and_get_new_kif(int next_weight_n)
int add_kif_sum = 0;
for (;;) {
// call rsync
const int sleep_sec = 1200; // wait some sec
int sleep_sec = 1200; // wait some sec
FILE *fp = fopen("sleep.txt","r");
if ( fp==NULL ) {
PRT("fail open sleep.\n");
} else {
char str[TMP_BUF_LEN];
if ( fgets( str, TMP_BUF_LEN, fp ) ) {
sleep_sec = atoi(str);
}
if ( sleep_sec < 0 ) DEBUG_PRT("");
fclose(fp);
}

char str[256];
sprintf(str,"sleep %d",sleep_sec);
PRT("%s\n",str);
Expand Down Expand Up @@ -3464,20 +3476,24 @@ void start_zero_train(int *p_argc, char ***p_argv )
// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/20191029/_iter_200000.caffemodel"; // w774
// const char sNet[] = "/home/yss/shogi/learn/snapshots/20191029/_iter_312.caffemodel"; // w775
// const char sNet[] = "/home/yss/shogi/learn/snapshots/20191107/_iter_3432.caffemodel"; // w786
const char sNet[] = "/home/yss/shogi/learn/snapshots/20200328/_iter_1370000.caffemodel"; // w923
// const char sNet[] = "/home/yss/shogi/learn/snapshots/20200328/_iter_1370000.caffemodel"; // w923
// const char sNet[] = "/home/yss/shogi/learn/snapshots/20200708/_iter_5260000.caffemodel"; // w1449
// const char sNet[] = "/home/yss/shogi/learn/snapshots/20200928/_iter_5970000.caffemodel"; // w2046
// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201027/_iter_2440000.caffemodel"; // w2290
// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201109/_iter_1520000.caffemodel"; // w2442
// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201206/_iter_3070000.caffemodel"; // w2749

int next_weight_number =924; // 現在の最新の番号 +1
int next_weight_number =2750; // 現在の最新の番号 +1

net->CopyTrainedLayersFrom(sNet); // caffemodelを読み込んで学習を再開する場合
// net->CopyTrainedLayersFrom(sNet); // caffemodelを読み込んで学習を再開する場合
// load_aoba_txt_weight( net, "/home/yss/w000000000689.txt" ); // 既存のw*.txtを読み込む。*.caffemodelを何か読み込んだ後に
LOG(INFO) << "Solving ";
PRT("fReplayLearning=%d\n",fReplayLearning);

int iteration = 0; // 学習回数
int add = 0; // 追加された棋譜数
int remainder = 0;
int div = 0;
int update = 0;
int iter_weight = 0;

wait_again:
if ( fReplayLearning ) {
Expand Down Expand Up @@ -3505,7 +3521,25 @@ void start_zero_train(int *p_argc, char ***p_argv )
remainder = add - nLoop * min_n;
}

PRT("nLoop=%d,add=%d,add_mul=%.5f,MINI_BATCH=%d,kDataSize=%d,remainder=%d,iteration=%d(%d/%d)\n",nLoop,add,add_mul,MINI_BATCH,kDataSize,remainder,iteration,update,div);
const int ITER_WEIGHT_BASE = 10000*AVE_MOVES / (ITER_SIZE*MINI_BATCH); // 10000棋譜(平均128手)ごとにweightを作成
int iter_weight_limit = ITER_WEIGHT_BASE;
float reduce = 1.0; // weightは10000棋譜ごとで学習回数を10000から8000などに減らす。棋譜生成速度が速すぎるため
FILE *fp = fopen("reduce.txt","r");
if ( fp==NULL ) {
PRT("fail open reduce.\n");
} else {
char str[TMP_BUF_LEN];
if ( fgets( str, TMP_BUF_LEN, fp ) ) {
reduce = atof(str);
}
nLoop = (int)(reduce * nLoop);
iter_weight_limit = (int)(reduce * ITER_WEIGHT_BASE);
PRT("reduce=%7.4f, add=%d,nLoop=%d,iter_weight_limit=%d/%d\n",reduce,add,nLoop,iter_weight_limit,ITER_WEIGHT_BASE);
if ( reduce <= 0 || reduce > 1.0 || iter_weight_limit <= 0 ) DEBUG_PRT("");
fclose(fp);
}

PRT("nLoop=%d,add=%d,add_mul=%.3f,MINI_BATCH=%d,kDataSize=%d,remainder=%d,iteration=%d(%d/%d)\n",nLoop,add,add_mul,MINI_BATCH,kDataSize,remainder,iteration,iter_weight,iter_weight_limit);
int loop;
for (loop=0;loop<nLoop;loop++) {
static array<float, kDataSize * ONE_SIZE> input_data; // 大きいのでstaticで
Expand Down Expand Up @@ -3542,10 +3576,10 @@ void start_zero_train(int *p_argc, char ***p_argv )
// solver->Solve();
// solver->Snapshot(); // prototxt の設定で保存される
iteration++;

div = 10000*AVE_MOVES / (ITER_SIZE*MINI_BATCH); // 10000棋譜(平均128手)ごとにweightを作成
update = iteration % div;
if ( fReplayLearning==0 && update==0 ) {
iter_weight++;

if ( fReplayLearning==0 && iter_weight >= iter_weight_limit ) {
iter_weight = 0;
solver->Snapshot();
convert_caffemodel(iteration, next_weight_number);
next_weight_number++;
Expand Down
3 changes: 2 additions & 1 deletion src/usi-engine/bona/shogi.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ extern unsigned char ailast_one[512];
//#define BNZ_VER "11" // 20190709
//#define BNZ_VER "12" // 20201013
//#define BNZ_VER "13" // 20201023 resign 10%
#define BNZ_VER "14" // 20201108 declare win bug fix. fAutoResign
//#define BNZ_VER "14" // 20201108 declare win bug fix. fAutoResign
#define BNZ_VER "15" // 20201207 sente 1 mate bug fix
#define BNZ_NAME "AobaZero"

#define REP_MAX_PLY 32
Expand Down
7 changes: 3 additions & 4 deletions src/usi-engine/bona/ysszero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1123,15 +1123,17 @@ void create_node(tree_t * restrict ptree, int sideToMove, int ply, HASH_SHOGI *p
v = std::tanh(f); // -1 <= x <= +1 --> -0.76 <= x <= +0.76
// v = f;
// v = 0;
if ( sideToMove==BLACK ) v = -v;
// { static double va[2]; static int count[2]; va[sideToMove] += v; count[sideToMove]++; PRT("va[]=%10f,%10f\n",va[0]/(count[0]+1),va[1]/(count[1]+1)); }
// PRT("f=%10f,tanh()=%10f\n",f,v);
} else {
if ( move_num == 0 ) {
// get_network_policy_value() は常に先手が勝で(+1)、先手が負けで(-1)を返す。sideToMove は無関係
v = -1;
if ( sideToMove==BLACK ) v = +1; // 後手番で可能手がないなら先手の勝
} else {
v = get_network_policy_value(ptree, sideToMove, ply, phg);
}
// { PRT("ply=%2d,sideToMove=%d(BLACK=%d),move_num=%3d,v=%.5f\n",ply,sideToMove,BLACK,move_num,v); print_board(ptree); }
}
if ( sideToMove==BLACK ) v = -v;

Expand Down Expand Up @@ -1214,7 +1216,6 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply)
}
if ( select < 0 ) {
float v = -1;
if ( sideToMove==BLACK ) v = -1;
// PRT("no legal move. mate? ply=%d,child_num=%d,v=%.0f\n",ply,child_num,v);
UnLock(phg->entry_lock);
return v;
Expand Down Expand Up @@ -1329,10 +1330,8 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply)
if ( flag_sennitite != SENNITITE_NONE ) {
// 先手(WHITE)なら 勝=+1 負=-1, 後手(BLACK)なら 勝=+1 負=-1。Bonanzaの内部のblack,whiteは逆
win = 0;
if ( sideToMove==BLACK ) win = 0; // draw
if ( flag_sennitite == SENNITITE_WIN ) {
win = +1.0;
if ( sideToMove==BLACK ) win = +1.0;
}
// PRT("flag_sennitite=%d, win=%.1f, ply=%d\n",flag_sennitite,win,ply);
skip_search = 1;
Expand Down

0 comments on commit cf639aa

Please sign in to comment.