Skip to content

Commit

Permalink
Merge pull request #67 from kobanium/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
yssaya authored Dec 8, 2023
2 parents 1e01885 + 1bb3934 commit b452d10
Show file tree
Hide file tree
Showing 29 changed files with 1,859 additions and 172 deletions.
46 changes: 46 additions & 0 deletions learn/aoba_solver.prototxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#net: "aoba_zero_256x40b.prototxt"
#net: "aoba_zero_256x20b.prototxt"
net: "aoba_zero_256x20b_mb128.prototxt"
#net: "aoba_zero_256x40b_mb64.prototxt"
#net: "aoba_zero_64x15b.prototxt"
#net: "aoba_64x10b_swish_mb32.prototxt"

# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
#test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
weight_decay: 0.0002 # 2021-03-11 again
#weight_decay: 0.00004 # 2020-12-06
# The learning rate policy ~/caffe/src/caffe/proto/caffe.proto
#lr_policy: "inv" # base_lr * (1 + gamma * iter) ^ (- power)
# "step" base_lr * gamma ^ (floor(iter / step))
# "exp" base_lr * gamma ^ iter
#gamma: 0.0001
#power: 0.75
# Display every 100 iterations
display: 100
#display: 50
# snapshot intermediate results
snapshot: 2000000
snapshot_prefix: "snapshots/"
# solver mode: CPU or GPU
solver_mode: GPU
#solver_mode: CPU
#solver_type: ADAGRAD # default = SGD=0, NESTEROV=1, ADAGRAD=2

base_lr: 0.000002 # training at a learning rate of 0.01 = 1e-2

lr_policy: "step" # learning rate policy: drop the learning rate in "steps"
# by a factor of gamma every stepsize iterations

gamma: 0.5 # drop the learning rate by a factor of 10
# (i.e., multiply it by a factor of gamma = 0.1)

stepsize: 100000000 # drop the learning rate every 100K iterations

max_iter: 100010000 # train for 700K iterations total

momentum: 0.9
126 changes: 122 additions & 4 deletions learn/extract/ep_del_bn_scale_factor_version_short_auto.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#http://stackoverflow.com/questions/31324739/finding-gradient-of-a-caffe-conv-filter-with-regards-to-input
#import os
#import numpy as np
#import caffe
#import sys

#from google.protobuf import text_format
Expand All @@ -14,8 +15,40 @@
#sys.exit()

caffe.set_mode_cpu()
#net = caffe.Net("aya_i49.prototxt",caffe.TEST); # OK!
#net = caffe.Net("aya_i49.caffemodel",caffe.TEST); # Err
#net = caffe.Net("aya_i49.prototxt","aya_i49.caffemodel",caffe.TEST); # OK!
#net = caffe.Net("../20160114/aya_12_128.prototxt","../20160114/_iter_700000.caffemodel",caffe.TEST); # OK!
#net = caffe.Net("/home/yss/aya/Policy_F128/aya_i49.prototxt","/home/yss/aya/Policy_F128/i49_510_0619_266101.caffemodel",caffe.TEST);
#net = caffe.Net("","",caffe.TEST);
#net = caffe.Net("/home/yss/aya/Value_F128/aya_v_predict.prototxt","/home/yss/aya/Value_F128/aya_v2k_f128.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/test/20160621_i50_v9x9_ft3/9x9_v500_F32_L11_0349_predict.prototxt","/home/yss/test/20160621_i50_v9x9_ft3/_iter_542053.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/aya/Policy_F128/19_F32_L11.prototxt","/home/yss/aya/Policy_F128/19_v_i50_0127_kgs4d_F32L11_435794.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/aya/Policy_F128/19_v_i50_F64L14_bn.prototxt","/home/yss/aya/Policy_F128/19_v_i50_0318_kgs4d_add300_F64L14bn_350000.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/aya/Policy_F128/19_49_F32L12bn_m1.prototxt","/home/yss/aya/Policy_F128/19_49_F32L12bn_m1_s735.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/aya/Policy_F128/19_49_F32L12bn_m1.prototxt","/home/yss/aya/Policy_F128/19_49_F32L12bn_m1_0422.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/aya/Policy_F128/13_v_i50_F64L14.prototxt","/home/yss/aya/Policy_F128/13_v_i50_2k_r16_75_0518_F64L14_wd_b128_ft7_200000.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/test/20180122_i362_pro_flood_F64L29_b64_1_half/yss_F64L29.prototxt","/home/yss/test/20180122_i362_pro_flood_F64L29_b64_1_half/_iter_1880000.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/test/20180225_i361_t_pro_flood_F64L29_b64_ft4/yss_F64L29.prototxt","/home/yss/test/20180225_i361_t_pro_flood_F64L29_b64_ft4/_iter_1300000.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/yssfish/i362_64x29.prototxt","/home/yss/yssfish/i362_64x29_init_iter_0.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/yssfish/i362_64x29.prototxt","/home/yss/yssfish/i362_64x29_iter_1.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/yssfish/i362_64x29.prototxt","/home/yss/yssfish/20180620_i362_64x29_iter_380000.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/prg/yssfish/f256b20.prototxt","/home/yss/prg/yssfish/snapshots/20180224_256x20b_iter_0.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/prg/yssfish/f256b40.prototxt","/home/yss/prg/yssfish/snapshots/20180224_256x40b_iter_0.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/prg/yssfish/yss_zero.prototxt","/home/yss/prg/yssfish/snapshots/20190226_64x15b_iter_0.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/prg/yssfish/aoba_zero.prototxt","/home/yss/prg/yssfish/snapshots/20190226_64x15b_policy_visit_iter_0.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/prg/yssfish/yss_zero.prototxt","/home/yss/prg/yssfish/snapshots/_iter_1.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/prg/yssfish/f64b3.prototxt","/home/yss/prg/yssfish/snapshots/20190301_64x3b_iter_0.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/test/20190306_64L29_policy_160_139_bn_relu_cut_visit_x4_47000_1/aoba_zero.prototxt","/home/yss/test/20190306_64L29_policy_160_139_bn_relu_cut_visit_x4_47000_1/_iter_910000.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/prg/yssfish/aoba_zero.prototxt","/home/yss/prg/yssfish/snapshots/_iter_0.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/prg/yssfish/aoba_zero_64x15b.prototxt","/home/yss/test/20190306_64L29_policy_160_139_bn_relu_cut_visit_x4_47000_1/_iter_910000.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/prg/yssfish/aoba_zero_64x15b.prototxt","/home/yss/test/extract/_iter_148000.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/shogi/yssfish/aoba_zero_256x20b.prototxt","/home/yss/shogi/yssfish/20190417replay_lr001_wd00002/_iter_964000.caffemodel",caffe.TEST);
#net = caffe.Net("/home/yss/shogi/yssfish/aoba_zero_256x20b.prototxt","/home/yss/shogi/yssfish/20190419replay_lr001_wd00002_100000_1018000/_iter_36000.caffemodel",caffe.TEST);
net = caffe.Net("/home/yss/shogi/yssfish/aoba_zero_256x20b.prototxt",args[1],caffe.TEST);
#net = caffe.Net("/home/yss/shogi/yssfish/aoba_zero_256x20b.prototxt",args[1],caffe.TEST);
#net = caffe.Net("/home/yss/shogi/learn/aoba_zero_256x20b_mb128.prototxt",args[1],caffe.TEST);
#net = caffe.Net("/home/yss/shogi/learn/aoba_zero_256x40b_mb64.prototxt",args[1],caffe.TEST);
net = caffe.Net("/home/yss/shogi/learn/aoba_256x20b_swish_predict.prototxt",args[1],caffe.TEST);



Expand Down Expand Up @@ -51,8 +84,8 @@


def short_str(s):
r = '%.6g' % s
#r = '%.3g' % s # LZ style. this is maybe ok.
#r = '%.6g' % s
r = '%.3g' % s # LZ style. this is maybe ok.
u = r
if ( r[0:2]== '0.' ) :
u = r[1:]
Expand All @@ -68,12 +101,48 @@ def short_str(s):
fc_sum = 0
cv_sum = 0

bf.write('2\n') # version
#bf.write('2\n') # version
bf.write('3\n') # version

n_layer = len( net.params.items() )
print n_layer
#print net.params.items()[0][0]
#print net.params.items()[1]
#print net.params.items()[2]


# multi line comment from """ to """
"""
for loop in range(n_layer):
name = net.params.items()[loop][0]
print loop, name
print net.params[name][0].data.shape
a0 = net.params[name][0].data.shape[0]
#print a0
if 'conv' in name:
a1 = net.params[name][0].data.shape[1]
#if 'fc' in name:
if ('fc' in name or 'ip' in name):
b0 = net.params[name][1].data.shape[0]
print b0
if 'bn' in name:
#print net.params[name][0].data.shape[0]
#print net.params[name][0].data.shape[1]
a1 = net.params[name][1].data.shape[0]
b0 = net.params[name][2].data.shape[0]
print loop , name, a0,a1, ":", b0
#print net.params[name][3].data.shape[0]
for i in range(a0):
d = net.params[name][0].data[i]
print i,d
for i in range(a1):
d = net.params[name][1].data[i]
print i,d
d = net.params[name][2].data[0]
print d
sys.exit()
"""

for loop in range(n_layer):
name = net.params.items()[loop][0]
#print loop , name
Expand Down Expand Up @@ -155,3 +224,52 @@ def short_str(s):
sys.exit()


#for v in net.params.items()
# print [(v[0].data.shape, v[1].data.shape)]

#print net.params[v][0].data.shape # >> (256, 256, 3, 3)
#print net.params['conv2_3x3_256'][0].data.shape # >> (256, 256, 3, 3)
#print net.params['conv11_3x3_256'][0].data.shape # >> (256, 256, 3, 3)
#print net.params['conv12_3x3_1_0'][0].data.shape # >> (1, 256, 3, 3)
print [(k, v[0].data.shape, v[1].data.shape) for k, v in net.params.items()]

#print net.layers
#print net.layers[0].blobs
#print net.layers[1].blobs
#print len(net.layers[1].blobs) # >> 0
#print net.layers[1].blobs[0].data.shape # Err

print len(net.blobs['data'].data[0]) # >> 49
#print net.blobs['data'].data[0]
print len(net.params['conv1_5x5_256'][0].data) # >> 256,
print net.params['conv1_5x5_256'][0].data[0][0][0][0]
#print net.params['conv1_5x5_256'][0].data # contains the weight parameters, an array of shape (256, 1, 5, 5)
print len(net.params['conv1_5x5_256'][1].data) # >> 256
#print net.params['conv1_5x5_256'][1].data # bias, 256float, contains the bias parameters, an array of shape (256,)
#print net.params['conv1_5x5_256'][2].data # Err
print len(net.params['conv2_3x3_256'][0].data) # >> 256
print len(net.params['conv2_3x3_256'][1].data) # >> 256

print 'conv12_3x3_1_0'
print len(net.params['conv12_3x3_1_0'][0].data) # >> 1
#print net.params['conv12_3x3_1_0'][0].data
print len(net.params['conv12_3x3_1_0'][1].data) # >> 1
#print net.params['conv12_3x3_1_0'][1].data
#print net.params['flat0'][0].data # Err
#print net.params['softmax0'][0].data # Err

sys.exit()

bf = open('binary.bin', 'wb')
sum = 0
for i in range(256):
for j in range(49):
for k in range(5):
for m in range(5):
d = net.params['conv1_5x5_256'][0].data[i][j][k][m]
bf.write(struct.pack("f", d))
sum += 1

bf.close()
print sum
print 'done'
24 changes: 23 additions & 1 deletion learn/yss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,8 @@ int shogi::LoadCSA()
if ( pz->vv_move_visit.size() != (size_t)tesuu ) {
DEBUG_PRT("pz->vv_move_visit.size()=%d,tesuu=%d Err\n",pz->vv_move_visit.size(),tesuu);
}
vector <char> vc;
pz->vv_raw_policy.push_back(vc);
back_move();
char *p = lpLine + 1;
int count = 0, all_visit = 0, sum_visit = 0;
Expand All @@ -998,21 +1000,40 @@ int shogi::LoadCSA()
if ( s < 0 || s > 10000 ) DEBUG_PRT("Err s=%d,v=%s\n",s,str);
pz->v_score_x10k.push_back((unsigned short)s);
has_root_score = true;
} else if ( strstr(str,"r=") ) {
count--;
float score = atof(str+2);
int s = (int)(score * 10000);
if ( s < 0 || s > 10000 ) DEBUG_PRT("Err s=%d,v=%s\n",s,str);
pz->v_rawscore_x10k.push_back((unsigned short)s);
} else {
all_visit = atoi(str);
if ( all_visit > 0xffff ) all_visit = 0xffff;
pz->v_playouts_sum.push_back(all_visit);
if ( has_root_score == false ) pz->v_score_x10k.push_back(NO_ROOT_SCORE);
}
} else {
if ( (count&1)== 0 ) {
if ( b0==0 && b1==0 ) DEBUG_PRT("");
int v = atoi(str);
if ( v==0 ) DEBUG_PRT("v=0,%s\n",str);
if ( v > 0xffff ) v = 0xffff;
sum_visit += v;
unsigned short m = (((unsigned char)b0) << 8) | ((unsigned char)b1);
int move_visit = (m << 16) | v;
pz->vv_move_visit[tesuu].push_back(move_visit);
pz->vv_move_visit[tesuu].push_back(move_visit);
b0 = b1 = 0;
int len = strlen(str);
if ( len > 0 ) {
char c = str[len-1];
if ( ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ) {
if ( c >= 'a' ) c = c - 'a' + 26;
else c = c - 'A';
if ( c < 0 || c > 51 ) DEBUG_PRT("str=%s\n",str);
pz->vv_raw_policy[tesuu].push_back(c);
}
}

} else {
if ( getMoveFromCsaStr(&bz, &az, &tk, &nf, str)==0 ) DEBUG_PRT("");
int c = (tesuu + fGotekara)&1;
Expand Down Expand Up @@ -1215,6 +1236,7 @@ P-00AL
for (i=0;i<(int)pz->vv_move_visit.size();i++) {
sum += pz->vv_move_visit[i].size();
}
if ( pz->vv_move_visit.size() != pz->vv_raw_policy.size() ) DEBUG_PRT("pz->vv_raw_policy.size()=%d\n",pz->vv_raw_policy.size());
PRT("handicap=%d,moves=%d,result=%d, mv_sum=%d,%.1f\n",pz->handicap,pz->moves,pz->result,sum, (double)sum/(tesuu+0.00001f));
if ( pz->result_type == RT_NONE ) DEBUG_PRT("");
#endif
Expand Down
3 changes: 2 additions & 1 deletion learn/yss.h
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,8 @@ class shogi {
int make_www_samples();
void get_piece_num_diff(bool bGoteTurn, int d[]);
void sum_pwv(double z, bool bGoteTurn, double sumd[]);

void same_pos_check();
int is_koshikake_gin(ZERO_DB *p);

// fish関連
bool is_pseudo_legalYSS(Move m, Color sideToMove);
Expand Down
Loading

0 comments on commit b452d10

Please sign in to comment.