Skip to content

Commit

Permalink
Merge pull request LeelaChessZero#17 from LeelaChessZero/master
Browse files Browse the repository at this point in the history
get latest
  • Loading branch information
ankan-ban authored Jun 16, 2019
2 parents 792c6f3 + 4483924 commit f271c2c
Show file tree
Hide file tree
Showing 313 changed files with 116,555 additions and 33 deletions.
3 changes: 2 additions & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ environment:
- NAME: cuda
- NAME: opencl
- NAME: blas
clone_folder: c:\projects\lc0
install:
- cmd: set CUDA=false
- cmd: set OPENCL=false
Expand Down Expand Up @@ -41,7 +42,7 @@ install:
- cmd: IF NOT EXIST c:\cache\protobuf\ msbuild INSTALL.vcxproj /p:Configuration=Release /p:Platform=x64 /m
- cmd: set PATH=c:\cache\protobuf\bin;%PATH%
- cmd: IF NOT EXIST c:\cache\testnet appveyor DownloadFile http://lczero.org/get_network?sha=7170f639ba1cdc407283b8e52377283e36845b954788c6ada8897937637ef032 -Filename c:\cache\testnet
- cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy
- cmd: IF %GTEST%==true IF NOT EXIST C:\cache\syzygy mkdir C:\cache\syzygy
- cmd: IF %GTEST%==true cd C:\cache\syzygy
- cmd: IF %GTEST%==true IF NOT EXIST KQvK.rtbz curl --remote-name-all https://tablebase.lichess.ovh/tables/standard/3-4-5/K{P,N,R,B,Q}vK.rtb{w,z}
- cmd: IF %GTEST%==true IF NOT EXIST KQQvK.rtbz curl --remote-name-all https://tablebase.lichess.ovh/tables/standard/3-4-5/K{P,N,R,B,Q}{P,N,R,B,Q}vK.rtb{w,z}
Expand Down
4 changes: 3 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env bash

pushd "$(dirname "$0")"

set -e

case $1 in
Expand All @@ -21,7 +23,7 @@ else
meson ${BUILDDIR} --buildtype ${BUILDTYPE} --prefix ${INSTALL_PREFIX:-/usr/local} "$@"
fi

pushd ${BUILDDIR}
cd ${BUILDDIR}

NINJA=$(awk '/ninja/ {ninja=$4} END {print ninja}' meson-logs/meson-log.txt)

Expand Down
2 changes: 0 additions & 2 deletions lc0@exe/lc0@exe.log

This file was deleted.

10 changes: 7 additions & 3 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,11 @@ if get_option('build_backends')
endif

if get_option('blas')
if get_option('mkl') and mkl_lib.found()
if get_option('eigen')
add_project_arguments('-DUSE_EIGEN', language : 'cpp')
has_blas = true

elif get_option('mkl') and mkl_lib.found()
add_project_arguments('-DUSE_MKL', language : 'cpp')
includes += include_directories(get_option('mkl_include'))
deps += [ mkl_lib ]
Expand Down Expand Up @@ -442,9 +446,9 @@ executable('lc0', 'src/main.cc',


### Tests
gtest = dependency('gtest', fallback: ['gtest', 'gtest_dep'], required: false)

if get_option('gtest') and gtest.found()
if get_option('gtest')
gtest = dependency('gtest', fallback: ['gtest', 'gtest_dep'])
lc0_lib = library('lc0_lib', files, include_directories: includes, dependencies: deps)

test('ChessBoard',
Expand Down
5 changes: 5 additions & 0 deletions meson_options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ option('mkl',
value: true,
description: 'Enable MKL BLAS support')

option('eigen',
type: 'boolean',
value: false,
description: 'Use EIGEN as a BLAS alternative')

option('accelerate',
type: 'boolean',
value: true,
Expand Down
3 changes: 2 additions & 1 deletion src/mcts/node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,8 @@ V4TrainingData Node::GetV4TrainingData(GameResult game_result,
// Prevent garbage/invalid training data from being uploaded to server.
if (total_n <= 0.0f) throw Exception("Search generated invalid data!");
// Set illegal moves to have -1 probability.
std::memset(result.probabilities, -1, sizeof(result.probabilities));
std::fill(std::begin(result.probabilities), std::end(result.probabilities), -1);
// Set moves probabilities according to their relative amount of visits.
for (const auto& child : Edges()) {
result.probabilities[child.edge()->GetMove().as_nn_index()] =
child.GetN() / total_n;
Expand Down
9 changes: 6 additions & 3 deletions src/mcts/search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ void Search::SendUciInfo() REQUIRES(nodes_mutex_) {
uci_infos.emplace_back(common_info);
auto& uci_info = uci_infos.back();
if (score_type == "centipawn") {
uci_info.score = 111.714640912 * tan(1.5620688421 * edge.GetQ(default_q));
uci_info.score = 295 * edge.GetQ(default_q) /
(1 - 0.976953126 * std::pow(edge.GetQ(default_q), 14));
} else if (score_type == "centipawn_2018") {
uci_info.score = 290.680623072 * tan(1.548090806 * edge.GetQ(default_q));
} else if (score_type == "win_percentage") {
Expand Down Expand Up @@ -598,8 +599,10 @@ EdgeAndNode Search::GetBestChildWithTemperature(Node* parent,
root_limit.end()) {
continue;
}
if (edge.GetN() + offset > max_n) max_n = edge.GetN() + offset;
if (edge.GetQ(fpu) > max_eval) max_eval = edge.GetQ(fpu);
if (edge.GetN() + offset > max_n) {
max_n = edge.GetN() + offset;
max_eval = edge.GetQ(fpu);
}
}

// No move had enough visits for temperature, so use default child criteria
Expand Down
20 changes: 19 additions & 1 deletion src/neural/blas/convolution1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,19 @@
#include "neural/blas/convolution1.h"
#include "neural/blas/blas.h"

#ifdef USE_EIGEN
#include <Eigen/Dense>
#endif

namespace lczero {
#ifdef USE_EIGEN
template <typename T>
using EigenMatrixMap =
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
template <typename T>
using ConstEigenMatrixMap =
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
#endif

void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
const size_t output_channels, const float* input,
Expand All @@ -44,7 +56,7 @@ void Convolution1::Forward(const size_t batch_size, const size_t input_channels,

const float* batch_input = input + i * kSquares * input_channels;
float* batch_output = output + i * kSquares * output_channels;

#ifndef USE_EIGEN
cblas_sgemm(CblasRowMajor, // Row major formar
CblasNoTrans, // A not transposed
CblasNoTrans, // B not transposed
Expand All @@ -59,6 +71,12 @@ void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
0.0f, // beta
batch_output, // C
kSquares); // ldc, leading rank of B
#else
auto C_mat = EigenMatrixMap<float>(batch_output, kSquares, output_channels);
C_mat.noalias() =
ConstEigenMatrixMap<float>(batch_input, kSquares, input_channels) *
ConstEigenMatrixMap<float>(weights, input_channels, output_channels);
#endif
}
}

Expand Down
39 changes: 37 additions & 2 deletions src/neural/blas/fully_connected_layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,24 @@
#include <cassert>
#include <cmath>

#ifdef USE_EIGEN
#include <Eigen/Dense>
#endif

namespace lczero {
#ifdef USE_EIGEN
template <typename T>
using EigenVectorMap = Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, 1>>;
template <typename T>
using ConstEigenVectorMap =
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>>;
template <typename T>
using EigenMatrixMap =
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
template <typename T>
using ConstEigenMatrixMap =
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
#endif

void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
const size_t output_size,
Expand All @@ -41,11 +58,17 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
//
// rows output_size output_size input_size
//

#ifndef USE_EIGEN
cblas_sgemv(CblasRowMajor, CblasNoTrans,
// M K
(int)output_size, (int)input_size, 1.0f, weights,
(int)input_size, inputs, 1, 0.0f, outputs, 1);
#else
EigenVectorMap<float> y(outputs, output_size);
y.noalias() = ConstEigenMatrixMap<float>(weights, input_size, output_size)
.transpose() *
ConstEigenVectorMap<float>(inputs, input_size);
#endif
} else {
// more columns, matrix-matrix multiplication
//
Expand All @@ -66,7 +89,7 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
// passing a matrix A[m][n], the value should be m.
// cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
// ldb, beta, C, N);

#ifndef USE_EIGEN
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
(int)output_size, // M
(int)batch_size, // N
Expand All @@ -79,6 +102,13 @@ void FullyConnectedLayer::Forward1D(size_t batch_size, const size_t input_size,
0.0f, // beta
outputs, // C
(int)output_size); // ldc, leading rank of C
#else
auto C_mat = EigenMatrixMap<float>(outputs, output_size, batch_size);
C_mat.noalias() =
ConstEigenMatrixMap<float>(weights, input_size, output_size)
.transpose() *
ConstEigenMatrixMap<float>(inputs, input_size, batch_size);
#endif
}
if (apply_relu) {
for (size_t i = 0; i < batch_size; i++) {
Expand All @@ -104,7 +134,12 @@ float FullyConnectedLayer::Forward0D(const size_t size, const float* x,
// float cblas_sdot(const int N, const float *X, const int incX, const float
// *Y,
// const int incY);
#ifndef USE_EIGEN
return cblas_sdot((int)size, x, 1, y, 1);
#else
return ConstEigenVectorMap<float>(x, size)
.dot(ConstEigenVectorMap<float>(y, size));
#endif
}

} // namespace lczero
49 changes: 31 additions & 18 deletions src/neural/blas/network_blas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@
#include <cmath>
#include <iostream>

#ifdef USE_EIGEN
#include <Eigen/Core>
#endif

namespace lczero {
namespace {

Expand Down Expand Up @@ -142,6 +146,12 @@ void BlasComputation::ComputeBlocking() {
const auto input_channels = static_cast<size_t>(kInputPlanes);
const auto max_channels = std::max(output_channels, input_channels);

// The policy head may increase convolution max output size.
const auto max_output_channels =
(conv_policy_ && weights_.policy.biases.size() > output_channels)
? weights_.policy.biases.size()
: output_channels;

// Determine the largest batch for allocations.
const auto plane_count = planes_.size();
const auto largest_batch_size = std::min(max_batch_size_, plane_count);
Expand All @@ -167,7 +177,7 @@ void BlasComputation::ComputeBlocking() {
kSquares);

WinogradConvolution3 convolve3(largest_batch_size, max_channels,
output_channels);
max_output_channels);

std::vector<float> policy_buffer(largest_batch_size *
num_policy_input_planes * kSquares);
Expand Down Expand Up @@ -339,7 +349,9 @@ void BlasComputation::EncodePlanes(const InputPlanes& sample, float* buffer) {

BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options)
: weights_(file.weights()) {
#ifndef USE_EIGEN
int blas_cores = options.GetOrDefault<int>("blas_cores", 1);
#endif
max_batch_size_ =
static_cast<size_t>(options.GetOrDefault<int>("batch_size", 256));

Expand All @@ -352,7 +364,6 @@ BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options)
if (max_batch_size_ > kHardMaxBatchSize) {
max_batch_size_ = kHardMaxBatchSize;
}
std::cerr << "BLAS, maximum batch size set to " << max_batch_size_ << '\n';

const auto inputChannels = kInputPlanes;
const auto channels = static_cast<int>(weights_.input.biases.size());
Expand All @@ -379,44 +390,46 @@ BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options)
pol_channels, channels);
}

#ifdef USE_EIGEN
CERR << "Using Eigen version " << EIGEN_WORLD_VERSION << "."
<< EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION;
#endif

#ifdef USE_OPENBLAS
int num_procs = openblas_get_num_procs();
blas_cores = std::min(num_procs, blas_cores);
openblas_set_num_threads(blas_cores);
const char* core_name = openblas_get_corename();
const char* config = openblas_get_config();
std::cerr << "BLAS vendor: OpenBlas.\n";
std::cerr << "OpenBlas [" << config << "].\n";
std::cerr << "OpenBlas found " << num_procs << " " << core_name
<< " core(s).\n";
std::cerr << "OpenBLAS using " << blas_cores
<< " core(s) for this backend.\n";
CERR << "BLAS vendor: OpenBLAS.";
CERR << "OpenBLAS [" << config << "].";
CERR << "OpenBLAS found " << num_procs << " " << core_name << " core(s).";
CERR << "OpenBLAS using " << blas_cores << " core(s) for this backend.";
#endif

#ifdef USE_MKL
int max_procs = mkl_get_max_threads();
blas_cores = std::min(max_procs, blas_cores);
mkl_set_num_threads(blas_cores);
std::cerr << "BLAS vendor: MKL.\n";
CERR << "BLAS vendor: MKL.";
constexpr int len = 256;
char versionbuf[len];
mkl_get_version_string(versionbuf, len);
std::cerr << "MKL " << versionbuf << ".\n";
CERR << "MKL " << versionbuf << ".";
MKLVersion version;
mkl_get_version(&version);
std::cerr << "MKL platform: " << version.Platform
<< ", processor: " << version.Processor << ".\n";
std::cerr << "MKL can use up to " << max_procs << " thread(s).\n";
std::cerr << "MKL using " << blas_cores << " thread(s) for this backend.\n";
CERR << "MKL platform: " << version.Platform << ", processor: "
<< version.Processor << ".";
CERR << "MKL can use up to " << max_procs << " thread(s).";
CERR << "MKL using " << blas_cores << " thread(s) for this backend.";
#endif

#ifdef USE_ACCELERATE
std::cerr << "BLAS vendor: Apple vecLib.\n";
std::cerr << "Apple vecLib ignores blas_cores (" << blas_cores
<< ") parameter.\n";
CERR << "BLAS vendor: Apple vecLib.";
CERR << "Apple vecLib ignores blas_cores (" << blas_cores << ") parameter.";
#endif

std::cerr << "BLAS max batch size is " << max_batch_size_ << ".\n";
CERR << "BLAS max batch size is " << max_batch_size_ << ".";
}

std::unique_ptr<Network> MakeBlasNetwork(const WeightsFile& weights,
Expand Down
22 changes: 21 additions & 1 deletion src/neural/blas/winograd_convolution3.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,19 @@
#include "winograd_transform_ispc.h"
#endif

#ifdef USE_EIGEN
#include <Eigen/Dense>
#endif

namespace lczero {
#ifdef USE_EIGEN
template <typename T>
using EigenMatrixMap =
Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
template <typename T>
using ConstEigenMatrixMap =
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
#endif

WinogradConvolution3::WinogradConvolution3(const size_t max_batch_size,
const size_t max_input_layers,
Expand Down Expand Up @@ -206,7 +218,7 @@ void WinogradConvolution3::Sgemm(const size_t batch_size, const float* weights,

auto offset_v = b * batch_size * input_channels * kTiles;
auto offset_m = b * batch_size * output_channels * kTiles;

#ifndef USE_EIGEN
cblas_sgemm(CblasColMajor, // Row major format
CblasNoTrans, // A no trans
CblasNoTrans, // B no trans
Expand All @@ -220,6 +232,14 @@ void WinogradConvolution3::Sgemm(const size_t batch_size, const float* weights,
(int)input_channels, 0.0f, // ldV
&M_[offset_m], // M
(int)output_channels); // ldM
#else
auto C_mat = EigenMatrixMap<float>(&M_[offset_m], output_channels,
batch_size * kTiles);
C_mat.noalias() = ConstEigenMatrixMap<float>(
&weights[offset_u], output_channels, input_channels) *
ConstEigenMatrixMap<float>(&V_[offset_v], input_channels,
batch_size * kTiles);
#endif
}

#endif
Expand Down
Loading

0 comments on commit f271c2c

Please sign in to comment.