Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support increased parallelization in hw_vdf_client #155

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README_ASIC.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ List of options [default, min - max]:
Allows connecting to a timelord running on a remote host. Useful when running multiple machines with VDF hardware connecting to a single timelord.
--vdfs-mask N - mask for enabling VDF engines [7, 1 - 7]
The ASIC has 3 VDF engines numbered 0, 1, 2. If not running all 3 engines, the mask can be specified to enable specific engines. It must be the result of bitwise OR of the engine bits (1, 2, 4 for engines 0, 1, 2).
--vdf-threads N - number of software threads per VDF engine [4, 2 - 64]
--vdf-threads N - max number of software threads per VDF engine [4, 2 - 64]
Number of software threads computing intermediate values and proofs per VDF engine.
--proof-threads N - number of proof threads per VDF engine
--proof-threads N - max number of proof threads per VDF engine
Number of software threads only computing proofs per VDF engine. Must be less than --vdf-threads.
--segment-threads N - number of proof threads per segment [2, 1 - 8]
--auto-freq-period N - auto-adjust frequency every N seconds [0, 10 - inf]
--list - list available devices and exit
```
Expand Down
74 changes: 46 additions & 28 deletions src/hw/hw_proof.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <cstdlib>
#include <unistd.h>

static const uint32_t g_chkp_thres = 1000000;
static const uint32_t g_chkp_thres = 500000;
static const uint32_t g_skip_thres = 10;

void report_bad_vdf_value(struct vdf_state *vdf, struct vdf_value *val)
Expand Down Expand Up @@ -297,13 +297,47 @@ bool hw_proof_should_queue(struct vdf_state *vdf, uint64_t iters)
return iters < vdf->proofs[last_queued_idx].iters;
}

void hw_proof_queue_proofs(struct vdf_state *vdf, uint64_t iters, uint16_t prev)
{
uint32_t s = vdf->segment_threads;
uint32_t chkp_thres = g_chkp_thres * s;
double f = 1.0 / s;
// Calculate the share of the total iterations for the first proof segment
// The formula below is for a 2-segment case
double x = (f + 1) / (2 * f + 1);

if (iters > chkp_thres) {
// At least 2 segments are needed, find length of the first segment
uint64_t chkp_iters = iters * x;
if (iters - chkp_iters > chkp_thres) {
// Three segments are needed
uint64_t chkp2_iters;
double p = f + 1.0;
double k = p / f;
// Share of the total iterations for the second segment (of 3)
double y = p / (k * p + 2.0 * f + 1.0);
x = y * k;

chkp_iters = iters * x;
chkp_iters = chkp_iters / vdf->interval * vdf->interval;
prev = hw_queue_proof(vdf, chkp_iters, prev, 0);

chkp2_iters = iters * y;
iters -= chkp_iters;
chkp_iters = chkp2_iters;
}
chkp_iters = chkp_iters / vdf->interval * vdf->interval;
prev = hw_queue_proof(vdf, chkp_iters, prev, 0);
iters -= chkp_iters;
}
hw_queue_proof(vdf, iters, prev, HW_VDF_PROOF_FLAG_IS_REQ);
}

void hw_proof_process_req(struct vdf_state *vdf)
{
uint64_t iters;
uint64_t req_iters;
uint64_t base_iters = 0;
uint64_t chkp_iters;
uint32_t chkp_div = 4, chkp_mul = 3;
uint8_t max_chkp_segments = 64 - 3;
int i;
uint16_t prev = HW_VDF_PROOF_NONE;
Expand Down Expand Up @@ -348,27 +382,7 @@ void hw_proof_process_req(struct vdf_state *vdf)

iters = req_iters - base_iters;

if (iters > g_chkp_thres) {
// Split iters as [75%, 25%]
chkp_iters = iters * chkp_mul / chkp_div;
if (iters - chkp_iters > g_chkp_thres) {
// Split iters as [69%, 23%, 8%]
uint32_t chkp2_mul[] = { 69, 69 + 23 };
uint64_t chkp2_iters;

chkp_iters = iters * chkp2_mul[0] / 100;
chkp_iters = chkp_iters / vdf->interval * vdf->interval;
prev = hw_queue_proof(vdf, chkp_iters, prev, 0);

chkp2_iters = iters * chkp2_mul[1] / 100 - chkp_iters;
iters -= chkp_iters;
chkp_iters = chkp2_iters;
}
chkp_iters = chkp_iters / vdf->interval * vdf->interval;
prev = hw_queue_proof(vdf, chkp_iters, prev, 0);
iters -= chkp_iters;
}
hw_queue_proof(vdf, iters, prev, HW_VDF_PROOF_FLAG_IS_REQ);
hw_proof_queue_proofs(vdf, iters, prev);

{
ProofCmp cmp(vdf->proofs);
Expand Down Expand Up @@ -431,7 +445,7 @@ void hw_proof_process_work(struct vdf_state *vdf)
vdf->idx, i, iters, proof->seg_iters, is_chkp ? " [checkpoint]" : "");
vdf->queued_proofs.erase(vdf->queued_proofs.begin());
vdf->aux_threads_busy |= 1UL << i;
vdf->n_proof_threads += PARALLEL_PROVER_N_THREADS;
vdf->n_proof_threads += vdf->segment_threads;
proof->flags |= HW_VDF_PROOF_FLAG_STARTED;
std::thread(hw_compute_proof, vdf, idx, proof, i).detach();
}
Expand Down Expand Up @@ -569,7 +583,7 @@ void hw_stop_proof(struct vdf_state *vdf)
class HwProver : public ParallelProver {
public:
HwProver(Segment segm, integer D, struct vdf_state *vdf)
: ParallelProver(segm, D)
: ParallelProver(segm, D, vdf->segment_threads)
{
this->vdf = vdf;
k = FindK(segm.length);
Expand Down Expand Up @@ -677,7 +691,7 @@ void hw_compute_proof(struct vdf_state *vdf, size_t proof_idx, struct vdf_proof
Segment seg(start_iters, proof_iters - start_iters, x, y);
HwProver prover(seg, vdf->D, vdf);

if (!is_chkp && seg.length > g_chkp_thres) {
if (!is_chkp && seg.length > g_chkp_thres * vdf->segment_threads) {
LOG_INFO("VDF %d: Warning: too long final proof segment length=%lu",
vdf->idx, seg.length);
}
Expand Down Expand Up @@ -727,7 +741,7 @@ void hw_compute_proof(struct vdf_state *vdf, size_t proof_idx, struct vdf_proof
out:
if (thr_idx < vdf->max_aux_threads) {
vdf->aux_threads_busy &= ~(1UL << thr_idx);
vdf->n_proof_threads -= PARALLEL_PROVER_N_THREADS;
vdf->n_proof_threads -= vdf->segment_threads;
}
}

Expand Down Expand Up @@ -806,6 +820,10 @@ void init_vdf_state(struct vdf_state *vdf, struct vdf_proof_opts *opts, const ch
if (opts && opts->max_proof_threads) {
vdf->max_proof_threads = opts->max_proof_threads;
}
vdf->segment_threads = 2;
if (opts && opts->segment_threads) {
vdf->segment_threads = opts->segment_threads;
}

mpz_set_str(vdf->D.impl, d_str, 0);
mpz_set(vdf->L.impl, vdf->D.impl);
Expand Down
2 changes: 2 additions & 0 deletions src/hw/hw_proof.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct vdf_proof {
struct vdf_proof_opts {
uint8_t max_aux_threads;
uint8_t max_proof_threads;
uint8_t segment_threads;
};

struct vdf_state {
Expand Down Expand Up @@ -85,6 +86,7 @@ struct vdf_state {
uint8_t idx;
uint8_t max_aux_threads;
uint8_t max_proof_threads;
uint8_t segment_threads;
bool completed;
bool stopping;
bool init_done;
Expand Down
15 changes: 12 additions & 3 deletions src/hw/hw_vdf_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,7 @@ int parse_opts(int argc, char **argv, struct vdf_client_opts *opts)
{"vdfs-mask", required_argument, NULL, 1},
{"vdf-threads", required_argument, NULL, 1},
{"proof-threads", required_argument, NULL, 1},
{"segment-threads", required_argument, NULL, 1},
{"list", no_argument, NULL, 1},
{"auto-freq-period", required_argument, NULL, 1},
{0}
Expand All @@ -435,6 +436,7 @@ int parse_opts(int argc, char **argv, struct vdf_client_opts *opts)
opts->auto_freq = false;
opts->vpo.max_aux_threads = HW_VDF_DEFAULT_MAX_AUX_THREADS;
opts->vpo.max_proof_threads = 0;
opts->vpo.segment_threads = 0;
opts->vdfs_mask = 0;

while ((ret = getopt_long(argc, argv, "", long_opts, &long_idx)) == 1) {
Expand All @@ -451,8 +453,10 @@ int parse_opts(int argc, char **argv, struct vdf_client_opts *opts)
} else if (long_idx == 5) {
opts->vpo.max_proof_threads = strtoul(optarg, NULL, 0);
} else if (long_idx == 6) {
opts->do_list = true;
opts->vpo.segment_threads = strtoul(optarg, NULL, 0);
} else if (long_idx == 7) {
opts->do_list = true;
} else if (long_idx == 8) {
opts->auto_freq = true;
opts->auto_freq_period = strtoul(optarg, NULL, 0);
}
Expand Down Expand Up @@ -493,6 +497,10 @@ int parse_opts(int argc, char **argv, struct vdf_client_opts *opts)
LOG_SIMPLE("Number of proof threads must be less than VDF threads");
return -1;
}
if (opts->vpo.segment_threads > 8) {
LOG_SIMPLE("Number of proof threads per segment must be between 1 and 8");
return -1;
}
if (opts->auto_freq && opts->auto_freq_period < 10) {
LOG_SIMPLE("Invalid auto freq period");
return -1;
Expand Down Expand Up @@ -525,8 +533,9 @@ int hw_vdf_client_main(int argc, char **argv)
" --voltage N - set board voltage [%.2f, 0.7 - 1.0]\n"
" --ip A.B.C.D - timelord IP address [localhost]\n"
" --vdfs-mask N - mask for enabling VDF engines [7, 1 - 7]\n"
" --vdf-threads N - number of software threads per VDF engine [4, 2 - 64]\n"
" --proof-threads N - number of proof threads per VDF engine\n"
" --vdf-threads N - max number of software threads per VDF engine [4, 2 - 64]\n"
" --proof-threads N - max number of proof threads per VDF engine\n"
" --segment-threads N - number of proof threads per segment [2, 1 - 8]\n"
" --auto-freq-period N - auto-adjust frequency every N seconds [0, 10 - inf]\n"
" --list - list available devices and exit",
argv[0], (int)HW_VDF_DEF_FREQ, HW_VDF_DEF_VOLTAGE);
Expand Down
32 changes: 23 additions & 9 deletions src/prover_parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "proof_common.h"
#include "util.h"

#define PARALLEL_PROVER_N_THREADS 2
#define PROVER_MAX_SEGMENT_THREADS 8

class ParallelProver : public Prover {
private:
Expand Down Expand Up @@ -76,33 +76,47 @@ class ParallelProver : public Prover {
x_vals[thr_idx] = x;
}
public:
ParallelProver(Segment segm, integer D) : Prover(segm, D) {}
ParallelProver(Segment segm, integer D, size_t n_thr) : Prover(segm, D) {
this->n_threads = n_thr;
}
void GenerateProof();

protected:
integer B;
integer L;
form id;
form x_vals[PARALLEL_PROVER_N_THREADS];
form x_vals[PROVER_MAX_SEGMENT_THREADS];
size_t n_threads;
};

void ParallelProver::GenerateProof() {
PulmarkReducer reducer;
uint32_t len = l / n_threads;
uint32_t rem = l % n_threads;
uint32_t start = l;
std::thread threads[PROVER_MAX_SEGMENT_THREADS];

this->B = GetB(D, segm.x, segm.y);
this->L = root(-D, 4);
this->id = form::identity(D);

uint32_t l0 = l / 2;
uint32_t l1 = l - l0;
std::thread proof_thr(ParallelProver::ProofThread, this, 0, l, l0);
ProvePart(1, l1, l1);
for (size_t i = 0; i < n_threads; i++) {
uint32_t cur_len = len + (i < rem);
threads[i] = std::thread(ParallelProver::ProofThread, this, i, start, cur_len);
start -= cur_len;
}

proof_thr.join();
for (size_t i = 0; i < n_threads; i++) {
threads[i].join();
}
if (!PerformExtraStep()) {
return;
}
nucomp_form(proof, x_vals[0], x_vals[1], D, L);

proof = x_vals[0];
for (size_t i = 1; i < n_threads; i++) {
nucomp_form(proof, proof, x_vals[i], D, L);
}
reducer.reduce(proof);
OnFinish();
}
Expand Down
9 changes: 6 additions & 3 deletions src/vdf_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,17 +295,20 @@ class Prover {
std::atomic<bool> is_finished;
};

#define PARALLEL_PROVER_N_THREADS 2
#define PROVER_MAX_SEGMENT_THREADS 8

class ParallelProver : public Prover {
public:
ParallelProver(Segment segm, integer D) : Prover(segm, D) {}
ParallelProver(Segment segm, integer D, size_t n_thr) : Prover(segm, D) {
this->n_threads = n_thr;
}
void GenerateProof();
protected:
integer B;
integer L;
form id;
form x_vals[PARALLEL_PROVER_N_THREADS];
form x_vals[PROVER_MAX_SEGMENT_THREADS];
size_t n_threads;
};

void nudupl_form(form &a, form &b, integer &D, integer &L);
Expand Down