Merge pull request #59 from kobanium/develop

Develop
kobanium · Jan 10, 2022 · bf1ea7e · bf1ea7e
2 parents b4a5ace + c65afdc
commit bf1ea7e
Show file tree

Hide file tree

Showing 10 changed files with 279 additions and 106 deletions.
diff --git a/learn/aoba_zero_solver.prototxt b/learn/aoba_zero_solver.prototxt
@@ -2,6 +2,7 @@
 #net: "aoba_zero_256x40b.prototxt"
 #net: "aoba_zero_256x20b.prototxt"
 net: "aoba_zero_256x20b_mb128.prototxt"
+#net: "aoba_zero_256x40b_mb64.prototxt"
 
 # test_iter specifies how many forward passes the test should carry out.
 # In the case of MNIST, we have test batch size 100 and 100 test iterations,
@@ -10,36 +11,35 @@ net: "aoba_zero_256x20b_mb128.prototxt"
 # Carry out testing every 500 training iterations.
 test_interval: 500
 # The base learning rate, momentum and the weight decay of the network.
-weight_decay: 0.0002
+weight_decay: 0.0002 # 2021-03-11 again
+#weight_decay: 0.00004 # 2020-12-06
 # The learning rate policy ~/caffe/src/caffe/proto/caffe.proto
 #lr_policy: "inv" # base_lr * (1 + gamma * iter) ^ (- power)
 # "step" base_lr * gamma ^ (floor(iter / step))
 # "exp" base_lr * gamma ^ iter
 #gamma: 0.0001
 #power: 0.75
 # Display every 100 iterations
-#display: 100
-display: 50
+display: 100
+#display: 50
 # snapshot intermediate results
-snapshot: 20000
+snapshot: 2000000
 snapshot_prefix: "snapshots/"
 # solver mode: CPU or GPU
 solver_mode: GPU
 #solver_mode: CPU
 #solver_type: ADAGRAD # default = SGD=0, NESTEROV=1, ADAGRAD=2
 
-base_lr: 0.0002 # training at a learning rate of 0.01 = 1e-2
+base_lr: 0.000002 # training at a learning rate of 0.01 = 1e-2
 
 lr_policy: "step" # learning rate policy: drop the learning rate in "steps"
  # by a factor of gamma every stepsize iterations
 
 gamma: 0.5 # drop the learning rate by a factor of 10
  # (i.e., multiply it by a factor of gamma = 0.1)
 
-#iter_size: 32  # batch_size: 128 with iter_size: 32 and batch_size: 4096 with iter_size: 1 are equivalent. except batch normalization.
+stepsize: 100000000 # drop the learning rate every 100K iterations
 
-stepsize: 10000000 # drop the learning rate every 100K iterations
-
-max_iter: 10010000 # train for 700K iterations total
+max_iter: 100010000 # train for 700K iterations total
 
 momentum: 0.9
diff --git a/learn/yss.cpp b/learn/yss.cpp
@@ -435,6 +435,7 @@ void debug_print(const char *fmt, ... )
  va_end(ap);
  static char text_out[TMP_BUF_LEN*2];
  sprintf(text_out,"%s%s",debug_str,text);
+ PRT_ON();
  PRT("%s\n",text_out);
  debug();
 }
@@ -896,6 +897,7 @@ int shogi::LoadCSA()
  char c;
  int prt_flag = 1;
  int fShortCSA = 0; // 盤面を座標で指定する詰将棋用
+ char sIndex[256];
 
  tesuu = 0;
  hirate_ban_init(KomaOti); // 盤面の初期化　平手の状態へ
@@ -933,18 +935,6 @@ int shogi::LoadCSA()
  prt_flag = 0;
  ban_saikousei(); // 盤面の再構成。
  check_kn(); // 盤面の状態が正常化チェック
-/*
- if ( fShinpo == 2 ) { // 吉村さん問題集の場合
- ReadOneLine(lpLine); 
- ReadOneLine(lpLine);
- ReadOneLine(lpLine);
- ReadOneLine(lpLine);
- ReadOneLine(lpLine);
- PRT("正解=%s",lpLine);
- strcpy(sYoshi660Seikai,lpLine);
- break;
- }
-*/
  }
 
  // csa形式のコメントを取り込む
@@ -954,6 +944,7 @@ int shogi::LoadCSA()
 // for (i=0;i<n;i++) PRT("%c",lpLine[i]);
  if ( tesuu == 0 ) {
  if ( strncmp(lpLine,"'no",3)==0 ) {
+ strncpy(sIndex,lpLine,255);
  }
  if ( strncmp(lpLine,"'w ",3)==0 ) {
  char *p = strchr(lpLine,',');
@@ -975,12 +966,13 @@ int shogi::LoadCSA()
  char *p = lpLine + 1;
  int count = 0, all_visit = 0, sum_visit = 0;
  int b0 = 0,b1 = 0;
+ bool has_root_score = false;
  for (;;) {
  char c;
  char str[10];
  int n = 0;
  for (;;) {
- if ( n>=10 ) { PRT("Err csa move str >= 10.\n"); debug(); }
+ if ( n>=10 ) { PRT("Err csa move str >= %d,w=%d,%s\n",n,pz->weight_n,sIndex); debug(); }
  c = *p++;
  str[n++] = c;
  if ( c==',' || c=='\r' || c =='\n' || c==0 ) break;
@@ -989,9 +981,15 @@ int shogi::LoadCSA()
  if ( count==0 ) {
  if ( strstr(str,"v=") ) {
  count--;
+ float score = atof(str+2);
+ int s = (int)(score * 10000);
+ if ( s < 0 || s > 10000 ) DEBUG_PRT("Err s=%d,v=%s\n",s,str);
+ pz->v_score_x10k.push_back((unsigned short)s);
+ has_root_score = true;
  } else {
  all_visit = atoi(str);
  pz->v_playouts_sum.push_back(all_visit);
+ if ( has_root_score == false ) pz->v_score_x10k.push_back(NO_ROOT_SCORE);
  }
  } else {
  if ( (count&1)== 0 ) {
@@ -1183,6 +1181,7 @@ P-00AL
  sum += pz->vv_move_visit[i].size();
  }
  PRT("moves=%d,result=%d, mv_sum=%d,%.1f\n",pz->moves,pz->result,sum, (double)sum/(tesuu+0.00001f));
+ if ( pz->result_type == RT_NONE ) DEBUG_PRT("");
 #endif
  break; // 読み込み終了
  }