Skip to content

Commit

Permalink
CORE: fix bug for triggered colls in tl/ucp (openucx#757)
Browse files Browse the repository at this point in the history
  • Loading branch information
samnordmann authored and Kaidrikov Evgeny committed Jun 21, 2023
1 parent 4ec3bc9 commit f8516a9
Show file tree
Hide file tree
Showing 20 changed files with 18 additions and 38 deletions.
4 changes: 1 addition & 3 deletions src/components/cl/hier/allreduce/allreduce_rab.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -160,7 +160,6 @@ ucc_cl_hier_allreduce_rab_init_schedule(ucc_base_coll_args_t *coll_args,
schedule->super.post = ucc_cl_hier_allreduce_rab_start;
schedule->super.progress = NULL;
schedule->super.finalize = ucc_cl_hier_allreduce_rab_finalize;
schedule->super.triggered_post = ucc_triggered_post;
*sched_p = schedule;
return UCC_OK;

Expand Down Expand Up @@ -243,7 +242,6 @@ UCC_CL_HIER_PROFILE_FUNC(ucc_status_t, ucc_cl_hier_allreduce_rab_init,
}

schedule->super.super.super.post = ucc_cl_hier_rab_allreduce_start;
schedule->super.super.super.triggered_post = ucc_triggered_post;
schedule->super.super.super.finalize = ucc_cl_hier_ar_rab_schedule_finalize;
*task = &schedule->super.super.super;
return UCC_OK;
Expand Down
1 change: 0 additions & 1 deletion src/components/cl/hier/allreduce/allreduce_split_rail.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,6 @@ UCC_CL_HIER_PROFILE_FUNC(ucc_status_t, ucc_cl_hier_allreduce_split_rail_init,
}

schedule->super.super.super.post = ucc_cl_hier_split_rail_allreduce_start;
schedule->super.super.super.triggered_post = ucc_triggered_post;
schedule->super.super.super.finalize =
ucc_cl_hier_ar_split_rail_schedule_finalize;
*task = &schedule->super.super.super;
Expand Down
3 changes: 1 addition & 2 deletions src/components/cl/hier/alltoallv/alltoallv.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
*
* See file LICENSE for terms.
Expand Down Expand Up @@ -260,7 +260,6 @@ UCC_CL_HIER_PROFILE_FUNC(ucc_status_t, ucc_cl_hier_alltoallv_init,
schedule->super.post = ucc_cl_hier_alltoallv_start;
schedule->super.progress = NULL;
schedule->super.finalize = ucc_cl_hier_alltoallv_finalize;
schedule->super.triggered_post = ucc_triggered_post;
schedule->super.triggered_post_setup =
ucc_cl_hier_alltoallv_triggered_post_setup;
*task = &schedule->super;
Expand Down
4 changes: 1 addition & 3 deletions src/components/cl/hier/bcast/bcast_2step.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -172,7 +172,6 @@ ucc_cl_hier_bcast_2step_init_schedule(ucc_base_coll_args_t *coll_args,
schedule->super.post = ucc_cl_hier_bcast_2step_start;
schedule->super.progress = NULL;
schedule->super.finalize = ucc_cl_hier_bcast_2step_finalize;
schedule->super.triggered_post = ucc_triggered_post;
*sched_p = schedule;
return UCC_OK;

Expand Down Expand Up @@ -251,7 +250,6 @@ UCC_CL_HIER_PROFILE_FUNC(ucc_status_t, ucc_cl_hier_bcast_2step_init,
}

schedule->super.super.super.post = ucc_cl_hier_2step_bcast_start;
schedule->super.super.super.triggered_post = ucc_triggered_post;
schedule->super.super.super.finalize =
ucc_cl_hier_bcast_2step_schedule_finalize;
*task = &schedule->super.super.super;
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/cuda/allgather/allgather_linear.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -32,7 +32,6 @@ ucc_status_t ucc_tl_cuda_allgather_linear_init(ucc_base_coll_args_t *coll_args,

task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR;
task->super.post = ucc_tl_cuda_allgatherv_linear_start;
task->super.triggered_post = ucc_triggered_post;
task->super.progress = ucc_tl_cuda_allgatherv_linear_progress;
task->super.finalize = ucc_tl_cuda_allgatherv_linear_finalize;
task->bar = TASK_BAR(task);
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/cuda/allgather/allgather_ring.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -29,7 +29,6 @@ ucc_status_t ucc_tl_cuda_allgather_ring_init(ucc_base_coll_args_t *coll_args,

task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR;
task->super.post = ucc_tl_cuda_allgatherv_ring_start;
task->super.triggered_post = ucc_triggered_post;
task->super.progress = ucc_tl_cuda_allgatherv_ring_progress;
task->super.finalize = ucc_tl_cuda_allgatherv_ring_finalize;
task->bar = TASK_BAR(task);
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/cuda/allgatherv/allgatherv_linear.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -443,7 +443,6 @@ ucc_status_t ucc_tl_cuda_allgatherv_linear_init(ucc_base_coll_args_t *coll_args,
task->allgatherv_linear.dt = coll_args->args.dst.info_v.datatype;
task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR;
task->super.post = ucc_tl_cuda_allgatherv_linear_start;
task->super.triggered_post = ucc_triggered_post;
task->super.progress = ucc_tl_cuda_allgatherv_linear_progress;
task->super.finalize = ucc_tl_cuda_allgatherv_linear_finalize;
task->bar = TASK_BAR(task);
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/cuda/allgatherv/allgatherv_ring.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -431,7 +431,6 @@ ucc_status_t ucc_tl_cuda_allgatherv_ring_init(ucc_base_coll_args_t *coll_args,

task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR;
task->super.post = ucc_tl_cuda_allgatherv_ring_start;
task->super.triggered_post = ucc_triggered_post;
task->super.progress = ucc_tl_cuda_allgatherv_ring_progress;
task->super.finalize = ucc_tl_cuda_allgatherv_ring_finalize;
task->bar = TASK_BAR(task);
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/cuda/alltoall/alltoall_ce.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
*
* See file LICENSE for terms.
Expand Down Expand Up @@ -67,7 +67,6 @@ ucc_status_t ucc_tl_cuda_alltoall_ce_init(ucc_tl_cuda_task_t *task)
}

task->super.post = ucc_tl_cuda_alltoallv_ce_start;
task->super.triggered_post = ucc_triggered_post;
task->super.triggered_post_setup =
ucc_tl_cuda_alltoallv_ce_triggered_post_setup;
task->super.progress = ucc_tl_cuda_alltoallv_ce_progress;
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/cuda/alltoallv/alltoallv_ce.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
*
* See file LICENSE for terms.
Expand Down Expand Up @@ -455,7 +455,6 @@ ucc_status_t ucc_tl_cuda_alltoallv_ce_init(ucc_tl_cuda_task_t *task)

task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR;
task->super.post = ucc_tl_cuda_alltoallv_ce_start;
task->super.triggered_post = ucc_triggered_post;
task->super.triggered_post_setup =
ucc_tl_cuda_alltoallv_ce_triggered_post_setup;
task->super.progress = ucc_tl_cuda_alltoallv_ce_progress;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -36,7 +36,6 @@ ucc_status_t ucc_tl_cuda_reduce_scatter_linear_init(ucc_base_coll_args_t *coll_a
task->reduce_scatterv_linear.rbuf = coll_args->args.dst.info.buffer;
task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR;
task->super.post = ucc_tl_cuda_reduce_scatterv_linear_start;
task->super.triggered_post = ucc_triggered_post;
task->super.progress = ucc_tl_cuda_reduce_scatterv_linear_progress;
task->super.finalize = ucc_tl_cuda_reduce_scatterv_linear_finalize;
task->bar = TASK_BAR(task);
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/cuda/reduce_scatter/reduce_scatter_ring.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -39,7 +39,6 @@ ucc_status_t ucc_tl_cuda_reduce_scatter_ring_init(ucc_base_coll_args_t *coll_arg
task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR;
task->reduce_scatterv_ring.num_frags = ucc_div_round_up(send_size, frag_size);
task->super.post = ucc_tl_cuda_reduce_scatterv_ring_start;
task->super.triggered_post = ucc_triggered_post;
task->super.progress = ucc_tl_cuda_reduce_scatterv_ring_progress;
task->super.finalize = ucc_tl_cuda_reduce_scatterv_ring_finalize;
task->bar = TASK_BAR(task);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -452,7 +452,6 @@ ucc_tl_cuda_reduce_scatterv_linear_init(ucc_base_coll_args_t *coll_args,
task->super.post = ucc_tl_cuda_reduce_scatterv_linear_start;
task->super.progress = ucc_tl_cuda_reduce_scatterv_linear_progress;
task->super.finalize = ucc_tl_cuda_reduce_scatterv_linear_finalize;
task->super.triggered_post = ucc_triggered_post;
task->bar = TASK_BAR(task);

*task_p = &task->super;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -348,7 +348,6 @@ ucc_tl_cuda_reduce_scatterv_ring_init(ucc_base_coll_args_t *coll_args,

task->super.flags |= UCC_COLL_TASK_FLAG_EXECUTOR;
task->super.post = ucc_tl_cuda_reduce_scatterv_ring_start;
task->super.triggered_post = ucc_triggered_post;
task->super.progress = ucc_tl_cuda_reduce_scatterv_ring_progress;
task->super.finalize = ucc_tl_cuda_reduce_scatterv_ring_finalize;
task->reduce_scatterv_ring.get_count = ucc_tl_cuda_reduce_scatterv_get_count;
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/self/tl_self_coll.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) Meta Platforms, Inc. and affiliates. 2022.
*
* See file LICENSE for terms.
Expand All @@ -24,7 +24,6 @@ ucc_tl_self_coll_init_task(ucc_base_coll_args_t *coll_args,
ucc_coll_task_init(&task->super, coll_args, team);
UCC_TL_SELF_PROFILE_REQUEST_NEW(task, "tl_self_task", 0);
task->super.finalize = ucc_tl_self_coll_finalize;
task->super.triggered_post = ucc_triggered_post;
task->src = NULL;
task->dst = NULL;
task->size = 0;
Expand Down
1 change: 0 additions & 1 deletion src/components/tl/sharp/tl_sharp_team.c
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ ucc_status_t ucc_tl_sharp_coll_init(ucc_base_coll_args_t *coll_args,

task->req_handle = NULL;
task->super.finalize = ucc_tl_sharp_coll_finalize;
task->super.triggered_post = ucc_triggered_post;

switch (coll_args->args.coll_type)
{
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/ucp/allreduce/allreduce_sra_knomial.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -214,7 +214,6 @@ ucc_tl_ucp_allreduce_sra_knomial_init(ucc_base_coll_args_t *coll_args,
}
schedule_p->super.super.finalize =
ucc_tl_ucp_allreduce_sra_knomial_finalize;
schedule_p->super.super.triggered_post = ucc_triggered_post;
schedule_p->super.super.post = ucc_tl_ucp_allreduce_sra_knomial_start;
*task_h = &schedule_p->super.super;
return UCC_OK;
Expand Down
3 changes: 1 addition & 2 deletions src/components/tl/ucp/bcast/bcast_sag_knomial.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -125,7 +125,6 @@ ucc_tl_ucp_bcast_sag_knomial_init(ucc_base_coll_args_t *coll_args,
schedule->super.post = ucc_tl_ucp_bcast_sag_knomial_start;
schedule->super.progress = NULL;
schedule->super.finalize = ucc_tl_ucp_bcast_sag_knomial_finalize;
schedule->super.triggered_post = ucc_triggered_post;
*task_h = &schedule->super;
return UCC_OK;
out:
Expand Down
1 change: 0 additions & 1 deletion src/components/tl/ucp/tl_ucp_coll.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,6 @@ ucc_tl_ucp_init_task(ucc_base_coll_args_t *coll_args, ucc_base_team_t *team)
}

task->super.finalize = ucc_tl_ucp_coll_finalize;
task->super.triggered_post = ucc_triggered_post;
return task;
}

Expand Down
3 changes: 2 additions & 1 deletion src/schedule/ucc_schedule.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* See file LICENSE for terms.
*/
#include "ucc_schedule.h"
Expand Down Expand Up @@ -96,6 +96,7 @@ ucc_status_t ucc_coll_task_init(ucc_coll_task_t *task,
task->executor = NULL;
task->super.status = UCC_OPERATION_INITIALIZED;
task->triggered_post_setup = NULL;
task->triggered_post = ucc_triggered_post;
if (bargs) {
memcpy(&task->bargs, bargs, sizeof(*bargs));
}
Expand Down

0 comments on commit f8516a9

Please sign in to comment.