forked from openucx/ucc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathucc.conf
91 lines (73 loc) · 3.22 KB
/
ucc.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Default TLS configuration
# We mostly use "negate" interface so that default TL config
# never throws warnings if some TLs are not available
# Currently compiled tls: ucp,cuda,nccl,sharp
# Default for CL_BASIC: all except sharp,nccl.
# cuda will silently disqualify itself for multinode teams
# but will be used on a single node
UCC_CL_BASIC_TLS=^sharp,nccl
# Defaults for CL_HIER: set per SBGP
# Sharp should be explicitly enabled
UCC_CL_HIER_NODE_SBGP_TLS=^sharp,nccl
# cuda is also disabled for NODE_LEADERS and NET
UCC_CL_HIER_NODE_LEADERS_SBGP_TLS=^sharp,nccl,cuda
UCC_CL_HIER_NET_SBGP_TLS=^sharp,nccl,cuda
# FULL_SBGP is currently only used for hierarchical alltoall
# with ucp sbgp on top
UCC_CL_HIER_FULL_SBGP_TLS=ucp
# Tuning sections, currently only supports TL/UCP
#Intel Broadwell:
[vendor=intel model=broadwell team_size=28 ppn=28 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=7
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1
[vendor=intel model=broadwell team_size=2 ppn=1 nnodes=2]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=2
UCC_TL_UCP_TUNE=allreduce:0-128k:@0#allreduce:128k-inf:@1
[vendor=intel model=broadwell team_size=4 ppn=1 nnodes=4]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=4
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4
UCC_TL_UCP_TUNE=allreduce:0-16k:@0#allreduce:16k-inf:@1
[vendor=intel model=broadwell team_size=8 ppn=1 nnodes=8]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=8
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=8
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1
#Intel Skylake:
[vendor=intel model=skylake team_size=40 ppn=40 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=0-128k:host:2,128k-inf:host:8
UCC_TL_UCP_TUNE=allreduce:0-2k:@0#allreduce:2k-inf:@1
[vendor=intel model=skylake team_size=2 ppn=1 nnodes=2]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=2
UCC_TL_UCP_TUNE=allreduce:0-16k:@0#allreduce:16k-inf:@1
[vendor=intel model=skylake team_size=4 ppn=1 nnodes=4]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=4
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4
UCC_TL_UCP_TUNE=allreduce:0-8k:@0#allreduce:8k-inf:@1
[vendor=intel model=skylake team_size=8 ppn=1 nnodes=8]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-8k:host:8,8k-inf:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=8
UCC_TL_UCP_TUNE=allreduce:0-4k:@0#allreduce:4k-inf:@1
[vendor=intel model=skylake team_size=32 ppn=1 nnodes=32]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-8k:host:8,8k-inf:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=8
UCC_TL_UCP_TUNE=allreduce:0-2k:@0#allreduce:2k-inf:@1
#Amd Rome:
[vendor=amd model=rome team_size=128 ppn=128 nnodes=1]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=0-64k:host:4,64k-inf:host:8
UCC_TL_UCP_TUNE=allreduce:0-1k:@0#allreduce:1k-inf:@1
[vendor=amd model=rome team_size=2 ppn=1 nnodes=2]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=2
UCC_TL_UCP_TUNE=allreduce:0-256k:@0#allreduce:256k-inf:@1
[vendor=amd model=rome team_size=4 ppn=1 nnodes=4]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=4
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=4
UCC_TL_UCP_TUNE=allreduce:0-16k:@0#allreduce:16k-inf:@1
[vendor=amd model=rome team_size=8 ppn=1 nnodes=8]
UCC_TL_UCP_ALLREDUCE_KN_RADIX=0-8k:host:8,8k-inf:host:2
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=8
UCC_TL_UCP_TUNE=allreduce:0-8k:@0#allreduce:8k-inf:@1