Skip to content

Commit

Permalink
Merge branch 'master' into coverage-ggerganov#295
Browse files Browse the repository at this point in the history
  • Loading branch information
goerch authored Jul 18, 2023
2 parents 702c8ab + 35171f5 commit a9c9c03
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 16 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@ build-release/
build-sanitize-addr/
build-sanitize-thread/
build-cov/
build-ci-debug/
build-ci-release/
out/
tmp/
models/
models-mnt

compile_commands.json
CMakeSettings.json
Expand Down
23 changes: 18 additions & 5 deletions ci/run.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,23 @@
#/bin/bash

if [ -z "$2" ]; then
echo "usage: $0 <output-dir> <mnt-dir>"
exit 1
fi

mkdir -p "$1"
mkdir -p "$2"

OUT=$(realpath "$1")
MNT=$(realpath "$2")

rm -v $OUT/*.log
rm -v $OUT/*.exit
rm -v $OUT/*.md

sd=`dirname $0`
cd $sd/../

SRC=`pwd`
OUT="$1"
MNT="$2"

## helpers

Expand Down Expand Up @@ -183,8 +195,9 @@ function gg_sum_mpt {
if [ -z $GG_BUILD_LOW_PERF ]; then
rm -rf ${SRC}/models-mnt

mkdir -p $(realpath ${MNT}/models)
ln -sfn ${MNT}/models ${SRC}/models-mnt
mnt_models=$(realpath ${MNT}/models)
mkdir -p ${mnt_models}
ln -sfn ${mnt_models} ${SRC}/models-mnt

python3 -m pip install -r ${SRC}/requirements.txt
fi
Expand Down
19 changes: 18 additions & 1 deletion src/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3537,6 +3537,11 @@ void ggml_cuda_cpy(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tens
(void) dst;
}

void ggml_cuda_dup(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
ggml_cuda_cpy(src0, dst, nullptr);
(void) src1;
}

void ggml_cuda_diag_mask_inf(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_ASSERT(src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32);
ggml_cuda_op(src0, src1, dst, ggml_cuda_op_diag_mask_inf, true, true);
Expand Down Expand Up @@ -3670,7 +3675,7 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo
// recursively assign CUDA buffers until a compute tensor is found
if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_CPU) {
const ggml_op src0_op = tensor->src[0]->op;
if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW) {
if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW || src0_op == GGML_OP_PERMUTE) {
ggml_cuda_assign_buffers_impl(tensor->src[0], scratch, force_inplace);
}
}
Expand Down Expand Up @@ -3776,6 +3781,12 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
|| (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_GPU);

switch (tensor->op) {
case GGML_OP_DUP:
if (!any_on_device) {
return false;
}
func = ggml_cuda_dup;
break;
case GGML_OP_ADD:
if (!any_on_device) {
return false;
Expand Down Expand Up @@ -3830,6 +3841,12 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
}
func = ggml_cuda_cpy;
break;
case GGML_OP_CONT:
if (!any_on_device) {
return false;
}
func = ggml_cuda_dup;
break;
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
Expand Down
15 changes: 5 additions & 10 deletions src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -4420,8 +4420,8 @@ void ggml_free(struct ggml_context * ctx) {
if (&g_state.contexts[i].context == ctx) {
g_state.contexts[i].used = false;

GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n",
__func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);
GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
__func__, i, ggml_used_mem(ctx));

if (ctx->mem_buffer_owned) {
GGML_ALIGNED_FREE(ctx->mem_buffer);
Expand Down Expand Up @@ -16362,8 +16362,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (GGML_OP_HAS_FINALIZE[node->op]) {
params.nth = n_tasks_arr[node_n];
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
}
ggml_graph_compute_perf_stats_node(node, state->shared);
}

// distribute new work or execute it direct if 1T
Expand Down Expand Up @@ -16393,8 +16393,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (GGML_OP_HAS_FINALIZE[node->op]) {
params.type = GGML_TASK_FINALIZE;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
}

ggml_graph_compute_perf_stats_node(node, state->shared);
} else {
break;
}
Expand Down Expand Up @@ -16936,9 +16937,6 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
}

void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
//assert(cgraph->work == NULL);
//assert(cgraph->work_size == 0);

uint64_t size_eval = 0;

// compute size of intermediate results
Expand Down Expand Up @@ -17377,9 +17375,6 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {

GGML_PRINT("=== GRAPH ===\n");

// GGML_PRINT_DEBUG("n_threads = %d\n", cgraph->n_threads);
// GGML_PRINT_DEBUG("total work size = %zu bytes\n", cgraph->work_size);

GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * node = cgraph->nodes[i];
Expand Down

0 comments on commit a9c9c03

Please sign in to comment.