Merge pull request LeelaChessZero#17 from LeelaChessZero/master

get latest
DanielUranga · Jun 16, 2019 · f271c2c · f271c2c
2 parents 792c6f3 + 4483924
commit f271c2c
Show file tree

Hide file tree

Showing 313 changed files with 116,555 additions and 33 deletions.
diff --git a/appveyor.yml b/appveyor.yml
@@ -8,6 +8,7 @@ environment:
   - NAME: cuda
   - NAME: opencl
   - NAME: blas
+clone_folder: c:\projects\lc0
 install:
 - cmd: set CUDA=false
 - cmd: set OPENCL=false
@@ -41,7 +42,7 @@ install:
 - cmd: IF NOT EXIST c:\cache\protobuf\ msbuild INSTALL.vcxproj /p:Configuration=Release /p:Platform=x64 /m
 - cmd: set PATH=c:\cache\protobuf\bin;%PATH%
 - cmd: IF NOT EXIST c:\cache\testnet appveyor DownloadFile http://lczero.org/get_network?sha=7170f639ba1cdc407283b8e52377283e36845b954788c6ada8897937637ef032 -Filename c:\cache\testnet
-- cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy 
+- cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy
 - cmd: IF %GTEST%==true cd C:\cache\syzygy
 - cmd: IF %GTEST%==true IF NOT EXIST KQvK.rtbz curl --remote-name-all https://tablebase.lichess.ovh/tables/standard/3-4-5/K{P,N,R,B,Q}vK.rtb{w,z}
 - cmd: IF %GTEST%==true IF NOT EXIST KQQvK.rtbz curl --remote-name-all https://tablebase.lichess.ovh/tables/standard/3-4-5/K{P,N,R,B,Q}{P,N,R,B,Q}vK.rtb{w,z}

diff --git a/build.sh b/build.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+pushd "$(dirname "$0")"
+
 set -e
 
 case $1 in
@@ -21,7 +23,7 @@ else
   meson ${BUILDDIR} --buildtype ${BUILDTYPE} --prefix ${INSTALL_PREFIX:-/usr/local} "$@"
 fi
 
-pushd ${BUILDDIR}
+cd ${BUILDDIR}
 
 NINJA=$(awk '/ninja/ {ninja=$4} END {print ninja}' meson-logs/meson-log.txt)
 

diff --git a/lc0@exe/lc0@exe.log b/lc0@exe/lc0@exe.log
diff --git a/meson.build b/meson.build
@@ -195,7 +195,11 @@ if get_option('build_backends')
   endif
 
   if get_option('blas')
-    if get_option('mkl') and mkl_lib.found()
+    if get_option('eigen')
+      add_project_arguments('-DUSE_EIGEN', language : 'cpp')
+      has_blas = true
+
+    elif get_option('mkl') and mkl_lib.found()
       add_project_arguments('-DUSE_MKL', language : 'cpp')
       includes += include_directories(get_option('mkl_include'))
       deps += [ mkl_lib ]
@@ -442,9 +446,9 @@ executable('lc0', 'src/main.cc',
 
 
 ### Tests
-gtest = dependency('gtest', fallback: ['gtest', 'gtest_dep'], required: false)
 
-if get_option('gtest') and gtest.found()
+if get_option('gtest')
+  gtest = dependency('gtest', fallback: ['gtest', 'gtest_dep'])
   lc0_lib = library('lc0_lib', files, include_directories: includes, dependencies: deps)
 
   test('ChessBoard',

diff --git a/meson_options.txt b/meson_options.txt
@@ -103,6 +103,11 @@ option('mkl',
        value: true,
        description: 'Enable MKL BLAS support')
 
+option('eigen',
+       type: 'boolean',
+       value: false,
+       description: 'Use EIGEN as a BLAS alternative')
+
 option('accelerate',
        type: 'boolean',
        value: true,

diff --git a/src/mcts/node.cc b/src/mcts/node.cc
@@ -339,7 +339,8 @@ V4TrainingData Node::GetV4TrainingData(GameResult game_result,
   // Prevent garbage/invalid training data from being uploaded to server.
   if (total_n <= 0.0f) throw Exception("Search generated invalid data!");
   // Set illegal moves to have -1 probability.
-  std::memset(result.probabilities, -1, sizeof(result.probabilities));
+  std::fill(std::begin(result.probabilities), std::end(result.probabilities), -1);
+  // Set moves probabilities according to their relative amount of visits.
   for (const auto& child : Edges()) {
     result.probabilities[child.edge()->GetMove().as_nn_index()] =
         child.GetN() / total_n;

diff --git a/src/mcts/search.cc b/src/mcts/search.cc
@@ -126,7 +126,8 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) {
     uci_infos.emplace_back(common_info);
     auto& uci_info = uci_infos.back();
     if (score_type == "centipawn") {
-      uci_info.score = 111.714640912 * tan(1.5620688421 * edge.GetQ(default_q));
+      uci_info.score = 295 * edge.GetQ(default_q) /
+                       (1 - 0.976953126 * std::pow(edge.GetQ(default_q), 14));
     } else if (score_type == "centipawn_2018") {
       uci_info.score = 290.680623072 * tan(1.548090806 * edge.GetQ(default_q));
     } else if (score_type == "win_percentage") {
@@ -598,8 +599,10 @@ EdgeAndNode Search::GetBestChildWithTemperature(Node* parent,
             root_limit.end()) {
       continue;
     }
-    if (edge.GetN() + offset > max_n) max_n = edge.GetN() + offset;
-    if (edge.GetQ(fpu) > max_eval) max_eval = edge.GetQ(fpu);
+    if (edge.GetN() + offset > max_n) {
+      max_n = edge.GetN() + offset;
+      max_eval = edge.GetQ(fpu);
+    }
   }
 
   // No move had enough visits for temperature, so use default child criteria

diff --git a/src/neural/blas/convolution1.cc b/src/neural/blas/convolution1.cc
@@ -19,7 +19,19 @@
 #include "neural/blas/convolution1.h"
 #include "neural/blas/blas.h"
 
+#ifdef USE_EIGEN
+#include <Eigen/Dense>
+#endif
+
 namespace lczero {
+#ifdef USE_EIGEN
+template <typename T>
+using EigenMatrixMap =
+    Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
+template <typename T>
+using ConstEigenMatrixMap =
+    Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
+#endif
 
 void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
                            const size_t output_channels, const float* input,
@@ -44,7 +56,7 @@ void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
 
     const float* batch_input = input + i * kSquares * input_channels;
     float* batch_output = output + i * kSquares * output_channels;
-
+#ifndef USE_EIGEN
     cblas_sgemm(CblasRowMajor,         // Row major formar
                 CblasNoTrans,          // A not transposed
                 CblasNoTrans,          // B not transposed
@@ -59,6 +71,12 @@ void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
                 0.0f,                  // beta
                 batch_output,          // C
                 kSquares);             // ldc, leading rank of B
+#else
+    auto C_mat = EigenMatrixMap<float>(batch_output, kSquares, output_channels);
+    C_mat.noalias() =
+        ConstEigenMatrixMap<float>(batch_input, kSquares, input_channels) *
+        ConstEigenMatrixMap<float>(weights, input_channels, output_channels);
+#endif
   }
 }
 

diff --git a/src/neural/blas/fully_connected_layer.cc b/src/neural/blas/fully_connected_layer.cc
@@ -23,7 +23,24 @@
 #include <cassert>
 #include <cmath>
 
+#ifdef USE_EIGEN
+#include <Eigen/Dense>
+#endif
+
 namespace lczero {
+#ifdef USE_EIGEN
+template <typename T>
+using EigenVectorMap = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1>>;
+template <typename T>
+using ConstEigenVectorMap =
+    Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>>;
+template <typename T>
+using EigenMatrixMap =
+    Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
+template <typename T>
+using ConstEigenMatrixMap =
+    Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
+#endif
 
 void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
                                     const size_t output_size,
@@ -41,11 +58,17 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
     //
     //   rows  output_size      output_size          input_size
     //
-
+#ifndef USE_EIGEN
     cblas_sgemv(CblasRowMajor, CblasNoTrans,
                 // M     K
                 (int)output_size, (int)input_size, 1.0f, weights,
                 (int)input_size, inputs, 1, 0.0f, outputs, 1);
+#else
+    EigenVectorMap<float> y(outputs, output_size);
+    y.noalias() = ConstEigenMatrixMap<float>(weights, input_size, output_size)
+                      .transpose() *
+                  ConstEigenVectorMap<float>(inputs, input_size);
+#endif
   } else {
     // more columns, matrix-matrix multiplication
     //
@@ -66,7 +89,7 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
     // passing a matrix A[m][n], the value should be m.
     //    cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
     //                ldb, beta, C, N);
-
+#ifndef USE_EIGEN
     cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
                 (int)output_size,   // M
                 (int)batch_size,    // N
@@ -79,6 +102,13 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
                 0.0f,               // beta
                 outputs,            // C
                 (int)output_size);  // ldc, leading rank of C
+#else
+    auto C_mat = EigenMatrixMap<float>(outputs, output_size, batch_size);
+    C_mat.noalias() =
+        ConstEigenMatrixMap<float>(weights, input_size, output_size)
+            .transpose() *
+        ConstEigenMatrixMap<float>(inputs, input_size, batch_size);
+#endif
   }
   if (apply_relu) {
     for (size_t i = 0; i < batch_size; i++) {
@@ -104,7 +134,12 @@ float FullyConnectedLayer::Forward0D(const size_t size, const float* x,
   // float cblas_sdot(const int N, const float *X, const int incX, const float
   // *Y,
   // const int incY);
+#ifndef USE_EIGEN
   return cblas_sdot((int)size, x, 1, y, 1);
+#else
+  return ConstEigenVectorMap<float>(x, size)
+      .dot(ConstEigenVectorMap<float>(y, size));
+#endif
 }
 
 }  // namespace lczero
diff --git a/src/neural/blas/network_blas.cc b/src/neural/blas/network_blas.cc
@@ -33,6 +33,10 @@
 #include <cmath>
 #include <iostream>
 
+#ifdef USE_EIGEN
+#include <Eigen/Core>
+#endif
+
 namespace lczero {
 namespace {
 
@@ -142,6 +146,12 @@ void BlasComputation::ComputeBlocking() {
   const auto input_channels = static_cast<size_t>(kInputPlanes);
   const auto max_channels = std::max(output_channels, input_channels);
 
+  // The policy head may increase convolution max output size.
+  const auto max_output_channels =
+      (conv_policy_ && weights_.policy.biases.size() > output_channels)
+          ? weights_.policy.biases.size()
+          : output_channels;
+
   // Determine the largest batch for allocations.
   const auto plane_count = planes_.size();
   const auto largest_batch_size = std::min(max_batch_size_, plane_count);
@@ -167,7 +177,7 @@ void BlasComputation::ComputeBlocking() {
                                  kSquares);
 
   WinogradConvolution3 convolve3(largest_batch_size, max_channels,
-                                 output_channels);
+                                 max_output_channels);
 
   std::vector<float> policy_buffer(largest_batch_size *
                                    num_policy_input_planes * kSquares);
@@ -339,7 +349,9 @@ void BlasComputation::EncodePlanes(const InputPlanes& sample, float* buffer) {
 
 BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options)
     : weights_(file.weights()) {
+#ifndef USE_EIGEN
   int blas_cores = options.GetOrDefault<int>("blas_cores", 1);
+#endif
   max_batch_size_ =
       static_cast<size_t>(options.GetOrDefault<int>("batch_size", 256));
 
@@ -352,7 +364,6 @@ BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options)
   if (max_batch_size_ > kHardMaxBatchSize) {
     max_batch_size_ = kHardMaxBatchSize;
   }
-  std::cerr << "BLAS, maximum batch size set to " << max_batch_size_ << '\n';
 
   const auto inputChannels = kInputPlanes;
   const auto channels = static_cast<int>(weights_.input.biases.size());
@@ -379,44 +390,46 @@ BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options)
                                                        pol_channels, channels);
   }
 
+#ifdef USE_EIGEN
+  CERR << "Using Eigen version " << EIGEN_WORLD_VERSION << "."
+       << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION;
+#endif
+
 #ifdef USE_OPENBLAS
   int num_procs = openblas_get_num_procs();
   blas_cores = std::min(num_procs, blas_cores);
   openblas_set_num_threads(blas_cores);
   const char* core_name = openblas_get_corename();
   const char* config = openblas_get_config();
-  std::cerr << "BLAS vendor: OpenBlas.\n";
-  std::cerr << "OpenBlas [" << config << "].\n";
-  std::cerr << "OpenBlas found " << num_procs << " " << core_name
-            << " core(s).\n";
-  std::cerr << "OpenBLAS using " << blas_cores
-            << " core(s) for this backend.\n";
+  CERR << "BLAS vendor: OpenBLAS.";
+  CERR << "OpenBLAS [" << config << "].";
+  CERR << "OpenBLAS found " << num_procs << " " << core_name << " core(s).";
+  CERR << "OpenBLAS using " << blas_cores << " core(s) for this backend.";
 #endif
 
 #ifdef USE_MKL
   int max_procs = mkl_get_max_threads();
   blas_cores = std::min(max_procs, blas_cores);
   mkl_set_num_threads(blas_cores);
-  std::cerr << "BLAS vendor: MKL.\n";
+  CERR << "BLAS vendor: MKL.";
   constexpr int len = 256;
   char versionbuf[len];
   mkl_get_version_string(versionbuf, len);
-  std::cerr << "MKL " << versionbuf << ".\n";
+  CERR << "MKL " << versionbuf << ".";
   MKLVersion version;
   mkl_get_version(&version);
-  std::cerr << "MKL platform: " << version.Platform
-            << ", processor: " << version.Processor << ".\n";
-  std::cerr << "MKL can use up to " << max_procs << " thread(s).\n";
-  std::cerr << "MKL using " << blas_cores << " thread(s) for this backend.\n";
+  CERR << "MKL platform: " << version.Platform << ", processor: "
+       << version.Processor << ".";
+  CERR << "MKL can use up to " << max_procs << " thread(s).";
+  CERR << "MKL using " << blas_cores << " thread(s) for this backend.";
 #endif
 
 #ifdef USE_ACCELERATE
-  std::cerr << "BLAS vendor: Apple vecLib.\n";
-  std::cerr << "Apple vecLib ignores blas_cores (" << blas_cores
-            << ") parameter.\n";
+  CERR << "BLAS vendor: Apple vecLib.";
+  CERR << "Apple vecLib ignores blas_cores (" << blas_cores << ") parameter.";
 #endif
 
-  std::cerr << "BLAS max batch size is " << max_batch_size_ << ".\n";
+  CERR << "BLAS max batch size is " << max_batch_size_ << ".";
 }
 
 std::unique_ptr<Network> MakeBlasNetwork(const WeightsFile& weights,

diff --git a/src/neural/blas/winograd_convolution3.cc b/src/neural/blas/winograd_convolution3.cc
@@ -29,7 +29,19 @@
 #include "winograd_transform_ispc.h"
 #endif
 
+#ifdef USE_EIGEN
+#include <Eigen/Dense>
+#endif
+
 namespace lczero {
+#ifdef USE_EIGEN
+template <typename T>
+using EigenMatrixMap =
+    Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
+template <typename T>
+using ConstEigenMatrixMap =
+    Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
+#endif
 
 WinogradConvolution3::WinogradConvolution3(const size_t max_batch_size,
                                            const size_t max_input_layers,
@@ -206,7 +218,7 @@ void WinogradConvolution3::Sgemm(const size_t batch_size, const float* weights,
 
     auto offset_v = b * batch_size * input_channels * kTiles;
     auto offset_m = b * batch_size * output_channels * kTiles;
-
+#ifndef USE_EIGEN
     cblas_sgemm(CblasColMajor,               // Row major format
                 CblasNoTrans,                // A no trans
                 CblasNoTrans,                // B no trans
@@ -220,6 +232,14 @@ void WinogradConvolution3::Sgemm(const size_t batch_size, const float* weights,
                 (int)input_channels, 0.0f,   // ldV
                 &M_[offset_m],               // M
                 (int)output_channels);       // ldM
+#else
+    auto C_mat = EigenMatrixMap<float>(&M_[offset_m], output_channels,
+                                       batch_size * kTiles);
+    C_mat.noalias() = ConstEigenMatrixMap<float>(
+                          &weights[offset_u], output_channels, input_channels) *
+                      ConstEigenMatrixMap<float>(&V_[offset_v], input_channels,
+                                                 batch_size * kTiles);
+#endif
   }
 
 #endif