Merge pull request #50 from kobanium/develop

sente mate bug fix.
kobanium · Dec 11, 2020 · cf639aa · cf639aa
2 parents cf845c6 + c39d4ef
commit cf639aa
Show file tree

Hide file tree

Showing 6 changed files with 61 additions and 21 deletions.
diff --git a/learn/reduce.txt b/learn/reduce.txt
@@ -0,0 +1 @@
+1.0
diff --git a/learn/sleep.txt b/learn/sleep.txt
@@ -0,0 +1 @@
+120
diff --git a/learn/yss.cpp b/learn/yss.cpp
@@ -987,8 +987,12 @@ int shogi::LoadCSA()
  }
  str[n-1] = 0;
  if ( count==0 ) {
- all_visit = atoi(str);
- pz->v_playouts_sum.push_back(all_visit);
+ if ( strstr(str,"v=") ) {
+ count--;
+ } else {
+ all_visit = atoi(str);
+ pz->v_playouts_sum.push_back(all_visit);
+ }
  } else {
  if ( (count&1)== 0 ) {
  if ( b0==0 && b1==0 ) debug();

diff --git a/learn/yss_dcnn.cpp b/learn/yss_dcnn.cpp
@@ -2447,13 +2447,13 @@ ZERO_DB zdb_one;
 ZERO_DB zdb[ZERO_DB_SIZE];
 int *pZDBsum = NULL;
 int zdb_count = 0;
-int zdb_count_start = 5200000; // 400万棋譜から読み込む場合は4000000
+int zdb_count_start = 0; //23400000; //20300000; //18800000; //16400000; //10300000; //5200000; // 400万棋譜から読み込む場合は4000000
 int zero_kif_pos_num = 0;
 int zero_kif_games = 0;
 const int MINI_BATCH = 128; // aoba_zero.prototxt の cross_entroy_scale も同時に変更すること！layerのnameも要変更
 const int ONE_SIZE = DCNN_CHANNELS*B_SIZE*B_SIZE; // 362*9*9; *4= 117288 *64 = 7506432, 7MBにもなる mini_batch=64
 
-const int fReplayLearning = 0; // すでに作られた棋譜からWindowをずらせて学習させる
+const int fReplayLearning = 1; // すでに作られた棋譜からWindowをずらせて学習させる
 const int fWwwSample = 0; // fReplayLearning も同時に1
 
 //const char ZERO_KIF_DB_FILENAME[] = "zerokif.db";
@@ -2481,7 +2481,7 @@ const int USE_XZ_NONE = 0;
 const int USE_XZ_POOL_ONLY = 1;
 const int USE_XZ_BOTH = 2;
 
-const int USE_XZ = USE_XZ_POOL_ONLY; // 1...poolのみ xz で。2...poolもarchiveも xz で
+const int USE_XZ = USE_XZ_BOTH; // 1...poolのみ xz で。2...poolもarchiveも xz で
 
 // archiveから棋譜番号=n の棋譜を取り出す。KifBuf[] に入る。速度無視。fpでは100番目以降が遅すぎて無理。
 int find_kif_from_archive(int search_n)
@@ -2790,7 +2790,19 @@ int shogi::wait_and_get_new_kif(int next_weight_n)
  int add_kif_sum = 0;
  for (;;) {
  // call rsync
- const int sleep_sec = 1200; // wait some sec
+ int sleep_sec = 1200; // wait some sec
+ FILE *fp = fopen("sleep.txt","r");
+ if ( fp==NULL ) {
+ PRT("fail open sleep.\n");
+ } else {
+ char str[TMP_BUF_LEN];
+ if ( fgets( str, TMP_BUF_LEN, fp ) ) {
+ sleep_sec = atoi(str);
+ }
+ if ( sleep_sec < 0 ) DEBUG_PRT("");
+ fclose(fp);
+ }
+
  char str[256];
  sprintf(str,"sleep %d",sleep_sec);
  PRT("%s\n",str);
@@ -3464,20 +3476,24 @@ void start_zero_train(int *p_argc, char ***p_argv )
 // const char sNet[] = "/home/yss/shogi/yssfish/snapshots/20191029/_iter_200000.caffemodel"; // w774
 // const char sNet[] = "/home/yss/shogi/learn/snapshots/20191029/_iter_312.caffemodel"; // w775
 // const char sNet[] = "/home/yss/shogi/learn/snapshots/20191107/_iter_3432.caffemodel"; // w786
- const char sNet[] = "/home/yss/shogi/learn/snapshots/20200328/_iter_1370000.caffemodel"; // w923
+// const char sNet[] = "/home/yss/shogi/learn/snapshots/20200328/_iter_1370000.caffemodel"; // w923
+// const char sNet[] = "/home/yss/shogi/learn/snapshots/20200708/_iter_5260000.caffemodel"; // w1449
+// const char sNet[] = "/home/yss/shogi/learn/snapshots/20200928/_iter_5970000.caffemodel"; // w2046
+// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201027/_iter_2440000.caffemodel"; // w2290
+// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201109/_iter_1520000.caffemodel"; // w2442
+// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201206/_iter_3070000.caffemodel"; // w2749
 
- int next_weight_number =924; // 現在の最新の番号 +1
+ int next_weight_number =2750; // 現在の最新の番号 +1
 
- net->CopyTrainedLayersFrom(sNet); // caffemodelを読み込んで学習を再開する場合
+// net->CopyTrainedLayersFrom(sNet); // caffemodelを読み込んで学習を再開する場合
 // load_aoba_txt_weight( net, "/home/yss/w000000000689.txt" ); // 既存のw*.txtを読み込む。*.caffemodelを何か読み込んだ後に
  LOG(INFO) << "Solving ";
  PRT("fReplayLearning=%d\n",fReplayLearning);
 
  int iteration = 0; // 学習回数
  int add = 0; // 追加された棋譜数
  int remainder = 0;
- int div = 0;
- int update = 0;
+ int iter_weight = 0;
 
 wait_again:
  if ( fReplayLearning ) {
@@ -3505,7 +3521,25 @@ void start_zero_train(int *p_argc, char ***p_argv )
  remainder = add - nLoop * min_n;
  }
 
- PRT("nLoop=%d,add=%d,add_mul=%.5f,MINI_BATCH=%d,kDataSize=%d,remainder=%d,iteration=%d(%d/%d)\n",nLoop,add,add_mul,MINI_BATCH,kDataSize,remainder,iteration,update,div);
+ const int ITER_WEIGHT_BASE = 10000*AVE_MOVES / (ITER_SIZE*MINI_BATCH); // 10000棋譜(平均128手)ごとにweightを作成
+ int iter_weight_limit = ITER_WEIGHT_BASE;
+ float reduce = 1.0; // weightは10000棋譜ごとで学習回数を10000から8000などに減らす。棋譜生成速度が速すぎるため
+ FILE *fp = fopen("reduce.txt","r");
+ if ( fp==NULL ) {
+ PRT("fail open reduce.\n");
+ } else {
+ char str[TMP_BUF_LEN];
+ if ( fgets( str, TMP_BUF_LEN, fp ) ) {
+ reduce = atof(str);
+ }
+ nLoop = (int)(reduce * nLoop);
+ iter_weight_limit = (int)(reduce * ITER_WEIGHT_BASE);
+ PRT("reduce=%7.4f, add=%d,nLoop=%d,iter_weight_limit=%d/%d\n",reduce,add,nLoop,iter_weight_limit,ITER_WEIGHT_BASE);
+ if ( reduce <= 0 || reduce > 1.0 || iter_weight_limit <= 0 ) DEBUG_PRT("");
+ fclose(fp);
+ }
+
+ PRT("nLoop=%d,add=%d,add_mul=%.3f,MINI_BATCH=%d,kDataSize=%d,remainder=%d,iteration=%d(%d/%d)\n",nLoop,add,add_mul,MINI_BATCH,kDataSize,remainder,iteration,iter_weight,iter_weight_limit);
  int loop;
  for (loop=0;loop<nLoop;loop++) {
  static array<float, kDataSize * ONE_SIZE> input_data; // 大きいのでstaticで
@@ -3542,10 +3576,10 @@ void start_zero_train(int *p_argc, char ***p_argv )
 // solver->Solve();
 // solver->Snapshot(); // prototxt の設定で保存される
  iteration++;
-
- div = 10000*AVE_MOVES / (ITER_SIZE*MINI_BATCH); // 10000棋譜(平均128手)ごとにweightを作成
- update = iteration % div;
- if ( fReplayLearning==0 && update==0 ) {
+ iter_weight++;
+
+ if ( fReplayLearning==0 && iter_weight >= iter_weight_limit ) {
+  iter_weight = 0;
  solver->Snapshot();
  convert_caffemodel(iteration, next_weight_number);
  next_weight_number++;

diff --git a/src/usi-engine/bona/shogi.h b/src/usi-engine/bona/shogi.h
@@ -118,7 +118,8 @@ extern unsigned char ailast_one[512];
 //#define BNZ_VER "11" // 20190709
 //#define BNZ_VER "12" // 20201013
 //#define BNZ_VER "13" // 20201023 resign 10%
-#define BNZ_VER "14" // 20201108 declare win bug fix. fAutoResign
+//#define BNZ_VER "14" // 20201108 declare win bug fix. fAutoResign
+#define BNZ_VER "15" // 20201207 sente 1 mate bug fix
 #define BNZ_NAME "AobaZero"
 
 #define REP_MAX_PLY 32

diff --git a/src/usi-engine/bona/ysszero.cpp b/src/usi-engine/bona/ysszero.cpp
@@ -1123,15 +1123,17 @@ void create_node(tree_t * restrict ptree, int sideToMove, int ply, HASH_SHOGI *p
  v = std::tanh(f); // -1 <= x <= +1 --> -0.76 <= x <= +0.76
 // v = f;
 // v = 0;
- if ( sideToMove==BLACK ) v = -v;
 // { static double va[2]; static int count[2]; va[sideToMove] += v; count[sideToMove]++; PRT("va[]=%10f,%10f\n",va[0]/(count[0]+1),va[1]/(count[1]+1)); }
 // PRT("f=%10f,tanh()=%10f\n",f,v);
  } else {
  if ( move_num == 0 ) {
+ // get_network_policy_value() は常に先手が勝で(+1)、先手が負けで(-1)を返す。sideToMove は無関係
  v = -1;
+ if ( sideToMove==BLACK ) v = +1; // 後手番で可能手がないなら先手の勝
  } else {
  v = get_network_policy_value(ptree, sideToMove, ply, phg);
  }
+// { PRT("ply=%2d,sideToMove=%d(BLACK=%d),move_num=%3d,v=%.5f\n",ply,sideToMove,BLACK,move_num,v); print_board(ptree); }
  }
  if ( sideToMove==BLACK ) v = -v;
 
@@ -1214,7 +1216,6 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply)
  }
  if ( select < 0 ) {
  float v = -1;
- if ( sideToMove==BLACK ) v = -1;
 // PRT("no legal move. mate? ply=%d,child_num=%d,v=%.0f\n",ply,child_num,v);
  UnLock(phg->entry_lock);
  return v;
@@ -1329,10 +1330,8 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply)
  if ( flag_sennitite != SENNITITE_NONE ) {
  // 先手(WHITE)なら 勝=+1 負=-1, 後手(BLACK)なら 勝=+1 負=-1。Bonanzaの内部のblack,whiteは逆
  win = 0;
- if ( sideToMove==BLACK ) win = 0; // draw
  if ( flag_sennitite == SENNITITE_WIN ) {
  win = +1.0;
- if ( sideToMove==BLACK ) win = +1.0;
  }
 // PRT("flag_sennitite=%d, win=%.1f, ply=%d\n",flag_sennitite,win,ply);
  skip_search = 1;