Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extract spin lock to functions to be able to estimate the effect #7

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 57 additions & 40 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -9187,6 +9187,49 @@ struct ggml_compute_state {
struct ggml_compute_state_shared * shared;
};

void ggml_spin_lock_n_ready_ne(struct ggml_compute_state_shared * state_shared) {
while (atomic_load(&state_shared->n_ready) != 0) {
ggml_lock_lock (&state_shared->spin);
ggml_lock_unlock(&state_shared->spin);
}
}

void ggml_spin_lock_n_ready(struct ggml_compute_state_shared * state_shared) {
while (atomic_load(&state_shared->n_ready) > 0) {
ggml_lock_lock (&state_shared->spin);
ggml_lock_unlock(&state_shared->spin);
}
}

void ggml_spin_lock_has_work(struct ggml_compute_state_shared * state_shared) {
while (atomic_load(&state_shared->has_work)) {
ggml_lock_lock (&state_shared->spin);
ggml_lock_unlock(&state_shared->spin);
}
}

bool ggml_spin_lock_has_work_stop(struct ggml_compute_state_shared * state_shared) {
while (atomic_load(&state_shared->has_work)) {
if (atomic_load(&state_shared->stop)) {
return true;
}
ggml_lock_lock (&state_shared->spin);
ggml_lock_unlock(&state_shared->spin);
}
return false;
}

bool ggml_spin_lock_ne_has_work_stop(struct ggml_compute_state_shared * state_shared) {
while (!atomic_load(&state_shared->has_work)) {
if (atomic_load(&state_shared->stop)) {
return true;
}
ggml_lock_lock (&state_shared->spin);
ggml_lock_unlock(&state_shared->spin);
}
return false;
}

static thread_ret_t ggml_graph_compute_thread(void * data) {
struct ggml_compute_state * state = (struct ggml_compute_state *) data;

Expand All @@ -9196,24 +9239,16 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (atomic_fetch_add(&state->shared->n_ready, 1) == n_threads - 1) {
atomic_store(&state->shared->has_work, false);
} else {
while (atomic_load(&state->shared->has_work)) {
if (atomic_load(&state->shared->stop)) {
return 0;
}
ggml_lock_lock (&state->shared->spin);
ggml_lock_unlock(&state->shared->spin);
if (ggml_spin_lock_has_work_stop(state->shared)) {
return 0;
}
}

atomic_fetch_sub(&state->shared->n_ready, 1);

// wait for work
while (!atomic_load(&state->shared->has_work)) {
if (atomic_load(&state->shared->stop)) {
return 0;
}
ggml_lock_lock (&state->shared->spin);
ggml_lock_unlock(&state->shared->spin);
if (ggml_spin_lock_ne_has_work_stop(state->shared)) {
return 0;
}

// check if we should stop
Expand All @@ -9235,6 +9270,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
return 0;
}




void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
const int n_threads = cgraph->n_threads;

Expand Down Expand Up @@ -9511,10 +9549,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
atomic_store(&state_shared.has_work, false);
}

while (atomic_load(&state_shared.has_work)) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
ggml_spin_lock_has_work(&state_shared);

// launch thread pool
for (int j = 0; j < n_threads - 1; j++) {
Expand All @@ -9530,10 +9565,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)

atomic_fetch_sub(&state_shared.n_ready, 1);

while (atomic_load(&state_shared.n_ready) > 0) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
ggml_spin_lock_n_ready(&state_shared);

atomic_store(&state_shared.has_work, true);
}
Expand All @@ -9547,17 +9579,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
atomic_store(&state_shared.has_work, false);
}

while (atomic_load(&state_shared.has_work)) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
ggml_spin_lock_has_work(&state_shared);

atomic_fetch_sub(&state_shared.n_ready, 1);

while (atomic_load(&state_shared.n_ready) != 0) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
ggml_spin_lock_n_ready_ne(&state_shared);
}

// FINALIZE
Expand All @@ -9566,10 +9592,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
atomic_store(&state_shared.has_work, false);
}

while (atomic_load(&state_shared.has_work)) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
ggml_spin_lock_has_work(&state_shared);

// launch thread pool
for (int j = 0; j < n_threads - 1; j++) {
Expand Down Expand Up @@ -9602,17 +9625,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
atomic_store(&state_shared.has_work, false);
}

while (atomic_load(&state_shared.has_work)) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
ggml_spin_lock_has_work(&state_shared);

atomic_fetch_sub(&state_shared.n_ready, 1);

while (atomic_load(&state_shared.n_ready) != 0) {
ggml_lock_lock (&state_shared.spin);
ggml_lock_unlock(&state_shared.spin);
}
ggml_spin_lock_n_ready_ne(&state_shared);
}

// performance stats (node)
Expand Down