[1642057062.144953] [n-62-8-1:164377:0] debug.c:1191 UCX DEBUG using signal stack 0xffff8b850000 size 150016 [1642057062.145058] [n-62-8-1:164377:0] init.c:98 UCX DEBUG /dtu/spack/ucx-1.10.0/lib/libucs.so.0 loaded at 0xffff8b880000 [1642057062.145083] [n-62-8-1:164377:0] init.c:99 UCX DEBUG cmd line: /dtu/spack/ucx-1.10.0/bin/ucx_info -dvb # UCT version=1.10.0 revision b54f3b2 # configured with: --prefix=/dtu/spack/ucx-1.10.0 #define UCX_CONFIG_H #define ENABLE_ASSERT 1 #define ENABLE_BUILTIN_MEMCPY 1 #define ENABLE_DEBUG_DATA 0 #define ENABLE_MT 0 #define ENABLE_PARAMS_CHECK 1 #define ENABLE_SYMBOL_OVERRIDE 1 #define HAVE_1_ARG_BFD_SECTION_SIZE 0 #define HAVE_AARCH64_THUNDERX2 1 #define HAVE_ALLOCA 1 #define HAVE_ALLOCA_H 1 #define HAVE_ATTRIBUTE_NOOPTIMIZE 1 #define HAVE_CLEARENV 1 #define HAVE_CPLUS_DEMANGLE 1 #define HAVE_CPU_SET_T 1 #define HAVE_DC_DV 1 #define HAVE_DECL_ASPRINTF 1 #define HAVE_DECL_BASENAME 1 #define HAVE_DECL_BFD_GET_SECTION_FLAGS 1 #define HAVE_DECL_BFD_GET_SECTION_VMA 1 #define HAVE_DECL_BFD_SECTION_FLAGS 0 #define HAVE_DECL_BFD_SECTION_VMA 1 #define HAVE_DECL_CPU_ISSET 1 #define HAVE_DECL_CPU_ZERO 1 #define HAVE_DECL_ETHTOOL_CMD_SPEED 1 #define HAVE_DECL_FMEMOPEN 1 #define HAVE_DECL_F_SETOWN_EX 1 #define HAVE_DECL_IBV_ACCESS_ON_DEMAND 1 #define HAVE_DECL_IBV_ACCESS_RELAXED_ORDERING 0 #define HAVE_DECL_IBV_ADVISE_MR 1 #define HAVE_DECL_IBV_ALLOC_DM 1 #define HAVE_DECL_IBV_ALLOC_TD 1 #define HAVE_DECL_IBV_CMD_MODIFY_QP 0 #define HAVE_DECL_IBV_CREATE_CQ_ATTR_IGNORE_OVERRUN 1 #define HAVE_DECL_IBV_CREATE_QP_EX 1 #define HAVE_DECL_IBV_CREATE_SRQ 1 #define HAVE_DECL_IBV_CREATE_SRQ_EX 1 #define HAVE_DECL_IBV_EVENT_GID_CHANGE 1 #define HAVE_DECL_IBV_EVENT_TYPE_STR 1 #define HAVE_DECL_IBV_EXP_ACCESS_ALLOCATE_MR 0 #define HAVE_DECL_IBV_EXP_ACCESS_ON_DEMAND 0 #define HAVE_DECL_IBV_EXP_ALLOC_DM 0 #define HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE 0 #define HAVE_DECL_IBV_EXP_CQ_IGNORE_OVERRUN 0 #define HAVE_DECL_IBV_EXP_CQ_MODERATION 0 #define HAVE_DECL_IBV_EXP_CREATE_QP 0 #define HAVE_DECL_IBV_EXP_CREATE_SRQ 0 #define HAVE_DECL_IBV_EXP_DCT_OOO_RW_DATA_PLACEMENT 0 #define HAVE_DECL_IBV_EXP_DEVICE_ATTR_PCI_ATOMIC_CAPS 0 #define HAVE_DECL_IBV_EXP_DEVICE_ATTR_RESERVED_2 0 #define HAVE_DECL_IBV_EXP_DEVICE_DC_TRANSPORT 0 #define HAVE_DECL_IBV_EXP_DEVICE_MR_ALLOCATE 0 #define HAVE_DECL_IBV_EXP_MR_FIXED_BUFFER_SIZE 0 #define HAVE_DECL_IBV_EXP_MR_INDIRECT_KLMS 0 #define HAVE_DECL_IBV_EXP_ODP_SUPPORT_IMPLICIT 0 #define HAVE_DECL_IBV_EXP_POST_SEND 0 #define HAVE_DECL_IBV_EXP_PREFETCH_MR 0 #define HAVE_DECL_IBV_EXP_PREFETCH_WRITE_ACCESS 0 #define HAVE_DECL_IBV_EXP_QPT_DC_INI 0 #define HAVE_DECL_IBV_EXP_QP_CREATE_UMR 0 #define HAVE_DECL_IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG 0 #define HAVE_DECL_IBV_EXP_QP_OOO_RW_DATA_PLACEMENT 0 #define HAVE_DECL_IBV_EXP_QUERY_DEVICE 0 #define HAVE_DECL_IBV_EXP_QUERY_GID_ATTR 0 #define HAVE_DECL_IBV_EXP_REG_MR 0 #define HAVE_DECL_IBV_EXP_SEND_EXT_ATOMIC_INLINE 0 #define HAVE_DECL_IBV_EXP_SETENV 0 #define HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP 0 #define HAVE_DECL_IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD 0 #define HAVE_DECL_IBV_EXP_WR_NOP 0 #define HAVE_DECL_IBV_GET_ASYNC_EVENT 1 #define HAVE_DECL_IBV_GET_DEVICE_NAME 1 #define HAVE_DECL_IBV_LINK_LAYER_ETHERNET 1 #define HAVE_DECL_IBV_LINK_LAYER_INFINIBAND 1 #define HAVE_DECL_IBV_ODP_SUPPORT_IMPLICIT 0 #define HAVE_DECL_IBV_QPF_GRH_REQUIRED 1 #define HAVE_DECL_IBV_QUERY_DEVICE_EX 1 #define HAVE_DECL_IBV_QUERY_GID 1 #define HAVE_DECL_IBV_WC_STATUS_STR 1 #define HAVE_DECL_IPPROTO_TCP 1 #define HAVE_DECL_MADV_FREE 0 #define HAVE_DECL_MADV_REMOVE 1 #define HAVE_DECL_MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE 1 #define HAVE_DECL_MLX5DV_CREATE_QP 1 #define HAVE_DECL_MLX5DV_DCTYPE_DCT 1 #define HAVE_DECL_MLX5DV_DEVX_SUBSCRIBE_DEVX_EVENT 0 #define HAVE_DECL_MLX5DV_INIT_OBJ 1 #define HAVE_DECL_MLX5DV_IS_SUPPORTED 1 #define HAVE_DECL_MLX5DV_OBJ_AH 1 #define HAVE_DECL_MLX5DV_QP_CREATE_ALLOW_SCATTER_TO_CQE 1 #define HAVE_DECL_MLX5DV_UAR_ALLOC_TYPE_BF 0 #define HAVE_DECL_MLX5DV_UAR_ALLOC_TYPE_NC 0 #define HAVE_DECL_POSIX_MADV_DONTNEED 1 #define HAVE_DECL_PR_SET_PTRACER 1 #define HAVE_DECL_RDMA_ESTABLISH 0 #define HAVE_DECL_RDMA_INIT_QP_ATTR 0 #define HAVE_DECL_SOL_SOCKET 1 #define HAVE_DECL_SO_KEEPALIVE 1 #define HAVE_DECL_SPEED_UNKNOWN 1 #define HAVE_DECL_STRERROR_R 1 #define HAVE_DECL_SYS_BRK 1 #define HAVE_DECL_SYS_IPC 0 #define HAVE_DECL_SYS_MADVISE 1 #define HAVE_DECL_SYS_MMAP 1 #define HAVE_DECL_SYS_MREMAP 1 #define HAVE_DECL_SYS_MUNMAP 1 #define HAVE_DECL_SYS_SHMAT 1 #define HAVE_DECL_SYS_SHMDT 1 #define HAVE_DECL_TCP_KEEPCNT 1 #define HAVE_DECL_TCP_KEEPIDLE 1 #define HAVE_DECL_TCP_KEEPINTVL 1 #define HAVE_DECL___PPC_GET_TIMEBASE_FREQ 0 #define HAVE_DETAILED_BACKTRACE 1 #define HAVE_DEVX 1 #define HAVE_DLFCN_H 1 #define HAVE_HW_TIMER 1 #define HAVE_IB 1 #define HAVE_IBV_DM 1 #define HAVE_IN6_ADDR_S6_ADDR32 1 #define HAVE_INFINIBAND_MLX5DV_H 1 #define HAVE_INFINIBAND_TM_TYPES_H 1 #define HAVE_INTTYPES_H 1 #define HAVE_IP_IP_DST 1 #define HAVE_LIBGEN_H 1 #define HAVE_LIBRT 1 #define HAVE_LINUX_FUTEX_H 1 #define HAVE_LINUX_IP_H 1 #define HAVE_LINUX_MMAN_H 1 #define HAVE_MALLOC_GET_STATE 1 #define HAVE_MALLOC_H 1 #define HAVE_MALLOC_HOOK 1 #define HAVE_MALLOC_SET_STATE 1 #define HAVE_MALLOC_TRIM 1 #define HAVE_MEMALIGN 1 #define HAVE_MEMORY_H 1 #define HAVE_MLX5_HW 1 #define HAVE_MLX5_HW_UD 1 #define HAVE_MREMAP 1 #define HAVE_NETINET_IP_H 1 #define HAVE_NET_ETHERNET_H 1 #define HAVE_NUMA 1 #define HAVE_NUMAIF_H 1 #define HAVE_NUMA_H 1 #define HAVE_ODP 1 #define HAVE_POSIX_MEMALIGN 1 #define HAVE_PREFETCH 1 #define HAVE_SCHED_GETAFFINITY 1 #define HAVE_SCHED_SETAFFINITY 1 #define HAVE_SIGACTION_SA_RESTORER 1 #define HAVE_SIGEVENT_SIGEV_UN_TID 1 #define HAVE_SIGHANDLER_T 1 #define HAVE_STDINT_H 1 #define HAVE_STDLIB_H 1 #define HAVE_STRERROR_R 1 #define HAVE_STRINGS_H 1 #define HAVE_STRING_H 1 #define HAVE_STRUCT_BITMASK 1 #define HAVE_STRUCT_DL_PHDR_INFO 1 #define HAVE_STRUCT_IBV_TM_CAPS_FLAGS 1 #define HAVE_STRUCT_MLX5DV_CQ_CQ_UAR 1 #define HAVE_SYS_EPOLL_H 1 #define HAVE_SYS_EVENTFD_H 1 #define HAVE_SYS_STAT_H 1 #define HAVE_SYS_TYPES_H 1 #define HAVE_SYS_UIO_H 1 #define HAVE_TL_DC 1 #define HAVE_TL_RC 1 #define HAVE_TL_UD 1 #define HAVE_UCM_PTMALLOC286 1 #define HAVE_UNISTD_H 1 #define HAVE___AARCH64_SYNC_CACHE_RANGE 1 #define HAVE___CLEAR_CACHE 1 #define HAVE___CURBRK 1 #define HAVE___SIGHANDLER_T 1 #define IBV_HW_TM 1 #define LT_OBJDIR ".libs/" #define NVALGRIND 1 #define PACKAGE "ucx" #define PACKAGE_BUGREPORT "" #define PACKAGE_NAME "ucx" #define PACKAGE_STRING "ucx 1.10" #define PACKAGE_TARNAME "ucx" #define PACKAGE_URL "" #define PACKAGE_VERSION "1.10" #define STDC_HEADERS 1 #define STRERROR_R_CHAR_P 1 #define UCM_BISTRO_HOOKS 1 #define UCS_MAX_LOG_LEVEL UCS_LOG_LEVEL_TRACE_POLL #define UCT_TCP_EP_KEEPALIVE 1 #define UCT_UD_EP_DEBUG_HOOKS 0 #define UCX_CONFIGURE_FLAGS "--prefix=/dtu/spack/ucx-1.10.0" #define UCX_MODULE_SUBDIR "ucx" #define VERSION "1.10" #define restrict __restrict #define test_MODULES ":module" #define ucm_MODULES "" #define uct_MODULES ":ib:rdmacm:cma" #define uct_cuda_MODULES "" #define uct_ib_MODULES "" #define uct_rocm_MODULES "" #define ucx_perftest_MODULES "" [1642057062.145220] [n-62-8-1:164377:0] module.c:69 UCX DEBUG ucs library path: /dtu/spack/ucx-1.10.0/lib/libucs.so.0 [1642057062.145232] [n-62-8-1:164377:0] module.c:251 UCX DEBUG loading modules for uct [1642057062.146153] [n-62-8-1:164377:0] module.c:180 UCX TRACE loaded /dtu/spack/ucx-1.10.0/lib/ucx/libuct_ib.so.0.0.0 [0x34e80ac0] [1642057062.146161] [n-62-8-1:164377:0] module.c:186 UCX TRACE not calling constructor 'ucs_module_global_init' in /dtu/spack/ucx-1.10.0/lib/ucx/libuct_ib.so.0 [1642057062.146502] [n-62-8-1:164377:0] module.c:180 UCX TRACE loaded /dtu/spack/ucx-1.10.0/lib/ucx/libuct_rdmacm.so.0.0.0 [0x34e83330] [1642057062.146510] [n-62-8-1:164377:0] module.c:186 UCX TRACE not calling constructor 'ucs_module_global_init' in /dtu/spack/ucx-1.10.0/lib/ucx/libuct_rdmacm.so.0 [1642057062.146673] [n-62-8-1:164377:0] module.c:180 UCX TRACE loaded /dtu/spack/ucx-1.10.0/lib/ucx/libuct_cma.so.0.0.0 [0x34e84020] [1642057062.146680] [n-62-8-1:164377:0] module.c:186 UCX TRACE not calling constructor 'ucs_module_global_init' in /dtu/spack/ucx-1.10.0/lib/ucx/libuct_cma.so.0 # # Memory domain: posix # Component: posix # allocate: unlimited # remote key: 24 bytes # rkey_ptr is supported # # Transport: posix # Device: memory # System device: [1642057062.146804] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method md [1642057062.147004] [n-62-8-1:164377:0] sys.c:652 UCX TRACE detected huge page size: 536870912 [1642057062.147015] [n-62-8-1:164377:0] mm_posix.c:507 UCX DEBUG allocated posix shared memory at 0xffff8b1a0000 length 65536 [1642057062.147019] [n-62-8-1:164377:0] uct_mem.c:294 UCX TRACE allocated 65536 bytes at 0xffff8b1a0000 using posix [1642057062.147045] [n-62-8-1:164377:0] time.c:22 UCX DEBUG measured arch clock speed: 200000000.00 Hz [1642057062.147074] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool mm_recv_desc: align 64, maxelems 4294967295, elemsize 8288 [1642057062.147080] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method md [1642057062.147761] [n-62-8-1:164377:0] mm_posix.c:507 UCX DEBUG allocated posix shared memory at 0xffff8ad80000 length 4325376 [1642057062.147765] [n-62-8-1:164377:0] uct_mem.c:294 UCX TRACE allocated 4325376 bytes at 0xffff8ad80000 using posix [1642057062.147772] [n-62-8-1:164377:0] mpool.c:206 UCX DEBUG mpool mm_recv_desc: allocated chunk 0xffff8ad80018 of 4325352 bytes with 519 elements [1642057062.147837] [n-62-8-1:164377:0] mm_iface.c:554 UCX DEBUG created mm iface 0x34e86ab0 FIFO id 0xc0000000c0028219 va 0xffff8b1a0000 size 65536 (128 x 64 elems) # # capabilities: # bandwidth: 0.00/ppn + 12179.00 MB/sec # latency: 80 nsec # overhead: 10 nsec # put_short: <= 4294967295 # put_bcopy: unlimited # get_bcopy: unlimited # am_short: <= 100 # am_bcopy: <= 8256 # domain: cpu # atomic_add: 32, 64 bit # atomic_and: 32, 64 bit # atomic_or: 32, 64 bit # atomic_xor: 32, 64 bit # atomic_fadd: 32, 64 bit # atomic_fand: 32, 64 bit # atomic_for: 32, 64 bit # atomic_fxor: 32, 64 bit # atomic_swap: 32, 64 bit # atomic_cswap: 32, 64 bit # connection: to iface # device priority: 0 # device num paths: 1 # max eps: inf # device address: 8 bytes # iface address: 8 bytes # error handling: none [1642057062.148027] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool mm_recv_desc destroyed # # # Memory domain: sysv # Component: sysv # allocate: unlimited # remote key: 12 bytes # rkey_ptr is supported # # Transport: sysv # Device: memory # System device: [1642057062.148103] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method md [1642057062.148109] [n-62-8-1:164377:0] mm_sysv.c:88 UCX DEBUG mm failed to allocate 8447 bytes with hugetlb [1642057062.148141] [n-62-8-1:164377:0] uct_mem.c:294 UCX TRACE allocated 65536 bytes at 0xffff8b1a0000 using sysv [1642057062.148172] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool mm_recv_desc: align 64, maxelems 4294967295, elemsize 8288 [1642057062.148175] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method md [1642057062.148178] [n-62-8-1:164377:0] mm_sysv.c:88 UCX DEBUG mm failed to allocate 4259952 bytes with hugetlb [1642057062.148192] [n-62-8-1:164377:0] uct_mem.c:294 UCX TRACE allocated 4325376 bytes at 0xffff8ad80000 using sysv [1642057062.148205] [n-62-8-1:164377:0] mpool.c:206 UCX DEBUG mpool mm_recv_desc: allocated chunk 0xffff8ad80018 of 4325352 bytes with 519 elements [1642057062.148665] [n-62-8-1:164377:0] mm_iface.c:554 UCX DEBUG created mm iface 0x34e86a50 FIFO id 0x8006 va 0xffff8b1a0000 size 65536 (128 x 64 elems) # # capabilities: # bandwidth: 0.00/ppn + 12179.00 MB/sec # latency: 80 nsec # overhead: 10 nsec # put_short: <= 4294967295 # put_bcopy: unlimited # get_bcopy: unlimited # am_short: <= 100 # am_bcopy: <= 8256 # domain: cpu # atomic_add: 32, 64 bit # atomic_and: 32, 64 bit # atomic_or: 32, 64 bit # atomic_xor: 32, 64 bit # atomic_fadd: 32, 64 bit # atomic_fand: 32, 64 bit # atomic_for: 32, 64 bit # atomic_fxor: 32, 64 bit # atomic_swap: 32, 64 bit # atomic_cswap: 32, 64 bit # connection: to iface # device priority: 0 # device num paths: 1 # max eps: inf # device address: 8 bytes # iface address: 8 bytes # error handling: none [1642057062.148798] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool mm_recv_desc destroyed # # # Memory domain: self # Component: self # register: unlimited, cost: 0 nsec # remote key: 0 bytes # # Transport: self # Device: memory # System device: [1642057062.148848] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool self_msg_desc: align 64, maxelems 4294967295, elemsize 8200 [1642057062.148851] [n-62-8-1:164377:0] self.c:202 UCX DEBUG created self iface id 0x7edd6985e72f95d3 send_size 8192 # # capabilities: # bandwidth: 0.00/ppn + 6911.00 MB/sec # latency: 0 nsec # overhead: 10 nsec # put_short: <= 4294967295 # put_bcopy: unlimited # get_bcopy: unlimited # am_short: <= 8K # am_bcopy: <= 8K # domain: cpu # atomic_add: 32, 64 bit # atomic_and: 32, 64 bit # atomic_or: 32, 64 bit # atomic_xor: 32, 64 bit # atomic_fadd: 32, 64 bit # atomic_fand: 32, 64 bit # atomic_for: 32, 64 bit # atomic_fxor: 32, 64 bit # atomic_swap: 32, 64 bit # atomic_cswap: 32, 64 bit # connection: to iface # device priority: 0 # device num paths: 1 # max eps: inf # device address: 0 bytes # iface address: 8 bytes # error handling: none [1642057062.148873] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool self_msg_desc destroyed # [1642057062.148926] [n-62-8-1:164377:0] sock.c:87 UCX DEBUG ioctl(req=35093, ifr_name=enp11s0f1) failed: Cannot assign requested address [1642057062.148949] [n-62-8-1:164377:0] sock.c:87 UCX DEBUG ioctl(req=35093, ifr_name=ib0) failed: Cannot assign requested address [1642057062.148962] [n-62-8-1:164377:0] sock.c:87 UCX DEBUG ioctl(req=35093, ifr_name=ib1) failed: Cannot assign requested address # # Memory domain: tcp # Component: tcp # register: unlimited, cost: 0 nsec # remote key: 0 bytes # # Transport: tcp # Device: lo # System device: [1642057062.149057] [n-62-8-1:164377:0] tcp_iface.c:490 UCX DEBUG using TCP port range: 0-0 [1642057062.149062] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool uct_tcp_iface_tx_buf_mp: align 64, maxelems 4294967295, elemsize 8205 [1642057062.149065] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool uct_tcp_iface_rx_buf_mp: align 64, maxelems 4294967295, elemsize 131090 [1642057062.150964] [n-62-8-1:164377:0] async.c:231 UCX DEBUG added async handler 0x34e871c0 [id=4 ref 1] uct_tcp_iface_connect_handler() to hash [1642057062.151097] [n-62-8-1:164377:0] async.c:504 UCX DEBUG listening to async event fd 4 events 0x5 mode thread_spinlock [1642057062.151108] [n-62-8-1:164377:0] tcp_iface.c:440 UCX DEBUG tcp_iface 0x34e86af0: listening for connections (fd=4) on 127.0.0.1:49105 # # capabilities: [1642057062.151128] [n-62-8-1:164377:0] sock.c:87 UCX DEBUG ioctl(req=35142, ifr_name=lo) failed: Operation not supported [1642057062.151140] [n-62-8-1:164377:0] tcp_net.c:60 UCX DEBUG speed of lo is UNKNOWN, assuming 100 Mbps # bandwidth: 11.91/ppn + 0.00 MB/sec # latency: 10960 nsec # overhead: 50000 nsec # put_zcopy: <= 18446744073709551590, up to 6 iov # put_opt_zcopy_align: <= 1 # put_align_mtu: <= 0 # am_short: <= 8K # am_bcopy: <= 8K # am_zcopy: <= 64K, up to 6 iov # am_opt_zcopy_align: <= 1 # am_align_mtu: <= 0 # am header: <= 8037 # connection: to ep, to iface # device priority: 1 # device num paths: 1 # max eps: 256 # device address: 16 bytes # iface address: 2 bytes # ep address: 10 bytes # error handling: peer failure [1642057062.151259] [n-62-8-1:164377:0] tcp_iface.c:699 UCX DEBUG tcp_iface 0x34e86af0: destroying [1642057062.151266] [n-62-8-1:164377:0] async.c:156 UCX DEBUG removed async handler 0x34e871c0 [id=4 ref 1] uct_tcp_iface_connect_handler() from hash [1642057062.151270] [n-62-8-1:164377:0] async.c:557 UCX DEBUG removing async handler 0x34e871c0 [id=4 ref 1] uct_tcp_iface_connect_handler() [1642057062.151366] [n-62-8-1:164377:0] async.c:577 UCX TRACE waiting for 0x34e871c0 [id=4 ref 1] uct_tcp_iface_connect_handler() completion (called=0) [1642057062.151373] [n-62-8-1:164377:0] async.c:171 UCX DEBUG release async handler 0x34e871c0 [id=4 ref 0] uct_tcp_iface_connect_handler() [1642057062.151379] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool uct_tcp_iface_rx_buf_mp destroyed [1642057062.151381] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool uct_tcp_iface_tx_buf_mp destroyed # # Transport: tcp # Device: enp11s0f0 # System device: [1642057062.151433] [n-62-8-1:164377:0] tcp_iface.c:490 UCX DEBUG using TCP port range: 0-0 [1642057062.151438] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool uct_tcp_iface_tx_buf_mp: align 64, maxelems 4294967295, elemsize 8205 [1642057062.151441] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool uct_tcp_iface_rx_buf_mp: align 64, maxelems 4294967295, elemsize 131090 [1642057062.151489] [n-62-8-1:164377:0] async.c:231 UCX DEBUG added async handler 0x34e872f0 [id=4 ref 1] uct_tcp_iface_connect_handler() to hash [1642057062.151574] [n-62-8-1:164377:0] async.c:504 UCX DEBUG listening to async event fd 4 events 0x5 mode thread_spinlock [1642057062.151581] [n-62-8-1:164377:0] tcp_iface.c:440 UCX DEBUG tcp_iface 0x34e86a30: listening for connections (fd=4) on 10.66.8.1:60489 # # capabilities: # bandwidth: 113.16/ppn + 0.00 MB/sec # latency: 5776 nsec # overhead: 50000 nsec # put_zcopy: <= 18446744073709551590, up to 6 iov # put_opt_zcopy_align: <= 1 # put_align_mtu: <= 0 # am_short: <= 8K # am_bcopy: <= 8K # am_zcopy: <= 64K, up to 6 iov # am_opt_zcopy_align: <= 1 # am_align_mtu: <= 0 # am header: <= 8037 # connection: to ep, to iface # device priority: 0 # device num paths: 1 # max eps: 256 # device address: 16 bytes # iface address: 2 bytes # ep address: 10 bytes # error handling: peer failure [1642057062.152434] [n-62-8-1:164377:0] tcp_iface.c:699 UCX DEBUG tcp_iface 0x34e86a30: destroying [1642057062.152440] [n-62-8-1:164377:0] async.c:156 UCX DEBUG removed async handler 0x34e872f0 [id=4 ref 1] uct_tcp_iface_connect_handler() from hash [1642057062.152443] [n-62-8-1:164377:0] async.c:557 UCX DEBUG removing async handler 0x34e872f0 [id=4 ref 1] uct_tcp_iface_connect_handler() [1642057062.152497] [n-62-8-1:164377:0] async.c:577 UCX TRACE waiting for 0x34e872f0 [id=4 ref 1] uct_tcp_iface_connect_handler() completion (called=0) [1642057062.152501] [n-62-8-1:164377:0] async.c:171 UCX DEBUG release async handler 0x34e872f0 [id=4 ref 0] uct_tcp_iface_connect_handler() [1642057062.152503] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool uct_tcp_iface_rx_buf_mp destroyed [1642057062.152506] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool uct_tcp_iface_tx_buf_mp destroyed # [1642057062.152536] [n-62-8-1:164377:0] tcp_sockcm.c:189 UCX DEBUG created tcp_sockcm 0x34e86680 # # Connection manager: tcp # max_conn_priv: 2032 bytes # # Memory domain: sockcm # Component: sockcm # supports client-server connection establishment via sockaddr # < no supported devices found > [1642057062.152556] [n-62-8-1:164377:0] module.c:251 UCX DEBUG loading modules for uct_ib [1642057062.153446] [n-62-8-1:164377:0] ib_md.c:1488 UCX TRACE opening IB device mlx5_2 [1642057062.157749] [n-62-8-1:164377:0] ib_mlx5dv_md.c:607 UCX DEBUG mlx5dv_open_device(mlx5_2) failed: Bad file descriptor [1642057062.157755] [n-62-8-1:164377:0] ib_md.c:1548 UCX DEBUG mlx5_2: md open by 'uct_ib_mlx5_devx_md_ops' failed, trying next [1642057062.159665] [n-62-8-1:164377:0] ib_device.c:496 UCX DEBUG mlx5_2 vendor_id: 0x15b3 device_id: 4117 [1642057062.159671] [n-62-8-1:164377:0] ib_mlx5dv_md.c:816 UCX DEBUG checking for DC support on mlx5_2 [1642057062.160880] [n-62-8-1:164377:0] ib_mlx5dv_md.c:854 UCX DEBUG failed to create DCT on mlx5_2: Operation not supported [1642057062.164012] [n-62-8-1:164377:0] async.c:231 UCX DEBUG added async handler 0x34e872f0 [id=4 ref 1] uct_ib_async_event_handler() to hash [1642057062.164089] [n-62-8-1:164377:0] async.c:504 UCX DEBUG listening to async event fd 4 events 0x1 mode thread_spinlock [1642057062.164099] [n-62-8-1:164377:0] ib_device.c:593 UCX DEBUG initialized device 'mlx5_2' (InfiniBand channel adapter) with 1 ports [1642057062.164187] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_2: cuda GPUDirect RDMA is disabled [1642057062.164198] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_2: rocm GPUDirect RDMA is disabled [1642057062.164212] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rcache_mp: align 8, maxelems 4294967295, elemsize 144 [1642057062.164621] [n-62-8-1:164377:0] module.c:251 UCX DEBUG loading modules for ucm [1642057062.164630] [n-62-8-1:164377:0] ib_md.c:1257 UCX DEBUG mlx5_2: using registration cache [1642057062.164721] [n-62-8-1:164377:0] ib_md.c:1453 UCX TRACE mlx5_2: pcie 8x gen3, effective throughput 6961.862MB/s (54.390Gb/s) [1642057062.164725] [n-62-8-1:164377:0] ib_md.c:1541 UCX DEBUG mlx5_2: md open by 'uct_ib_mlx5_md_ops' is successful [1642057062.164733] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_2:1 is not active (state: 1) [1642057062.164737] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_2:1 does not support flags 0x0: Destination is unreachable [1642057062.164769] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0x0 [1642057062.164774] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query rc_verbs resources: No such device [1642057062.164781] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_2:1 is not active (state: 1) [1642057062.164784] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_2:1 does not support flags 0x4: Destination is unreachable [1642057062.164786] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0x4 [1642057062.164788] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query rc_mlx5 resources: No such device [1642057062.164792] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_2:1 is not active (state: 1) [1642057062.164795] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_2:1 does not support flags 0xc4: Destination is unreachable [1642057062.164797] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0xc4 [1642057062.164799] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query dc_mlx5 resources: No such device [1642057062.164802] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_2:1 is not active (state: 1) [1642057062.164804] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_2:1 does not support flags 0x0: Destination is unreachable [1642057062.164806] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0x0 [1642057062.164808] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query ud_verbs resources: No such device [1642057062.164811] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_2:1 is not active (state: 1) [1642057062.164813] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_2:1 does not support flags 0x4: Destination is unreachable [1642057062.164815] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0x4 [1642057062.164816] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query ud_mlx5 resources: No such device [1642057062.164828] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_2: cuda GPUDirect RDMA is disabled [1642057062.164836] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_2: rocm GPUDirect RDMA is disabled # # Memory domain: mlx5_2 # Component: ib # register: unlimited, cost: 180 nsec # remote key: 8 bytes # local memory handle is required for zcopy # < no supported devices found > [1642057062.164854] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rcache_mp destroyed [1642057062.164931] [n-62-8-1:164377:0] ib_device.c:611 UCX DEBUG destroying ib device mlx5_2 [1642057062.164938] [n-62-8-1:164377:0] async.c:156 UCX DEBUG removed async handler 0x34e872f0 [id=4 ref 1] uct_ib_async_event_handler() from hash [1642057062.164942] [n-62-8-1:164377:0] async.c:557 UCX DEBUG removing async handler 0x34e872f0 [id=4 ref 1] uct_ib_async_event_handler() [1642057062.164983] [n-62-8-1:164377:0] async.c:577 UCX TRACE waiting for 0x34e872f0 [id=4 ref 1] uct_ib_async_event_handler() completion (called=0) [1642057062.164986] [n-62-8-1:164377:0] async.c:171 UCX DEBUG release async handler 0x34e872f0 [id=4 ref 0] uct_ib_async_event_handler() [1642057062.165230] [n-62-8-1:164377:0] ib_md.c:1488 UCX TRACE opening IB device mlx5_0 [1642057062.169205] [n-62-8-1:164377:0] ib_mlx5dv_md.c:607 UCX DEBUG mlx5dv_open_device(mlx5_0) failed: Bad file descriptor [1642057062.169217] [n-62-8-1:164377:0] ib_md.c:1548 UCX DEBUG mlx5_0: md open by 'uct_ib_mlx5_devx_md_ops' failed, trying next [1642057062.171918] [n-62-8-1:164377:0] ib_device.c:496 UCX DEBUG mlx5_0 vendor_id: 0x15b3 device_id: 4119 [1642057062.171924] [n-62-8-1:164377:0] ib_mlx5dv_md.c:816 UCX DEBUG checking for DC support on mlx5_0 [1642057062.173598] [n-62-8-1:164377:0] ib_mlx5dv_md.c:890 UCX DEBUG DC is supported on mlx5_0 [1642057062.175094] [n-62-8-1:164377:0] async.c:231 UCX DEBUG added async handler 0x34e89540 [id=4 ref 1] uct_ib_async_event_handler() to hash [1642057062.175186] [n-62-8-1:164377:0] async.c:504 UCX DEBUG listening to async event fd 4 events 0x1 mode thread_spinlock [1642057062.175191] [n-62-8-1:164377:0] ib_device.c:593 UCX DEBUG initialized device 'mlx5_0' (InfiniBand channel adapter) with 1 ports [1642057062.175280] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.175290] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.175302] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rcache_mp: align 8, maxelems 4294967295, elemsize 144 [1642057062.175399] [n-62-8-1:164377:0] ib_md.c:1257 UCX DEBUG mlx5_0: using registration cache [1642057062.175489] [n-62-8-1:164377:0] ib_md.c:1453 UCX TRACE mlx5_0: pcie 8x gen3, effective throughput 6961.862MB/s (54.390Gb/s) [1642057062.175493] [n-62-8-1:164377:0] ib_md.c:1541 UCX DEBUG mlx5_0: md open by 'uct_ib_mlx5_md_ops' is successful [1642057062.175564] [n-62-8-1:164377:0] topo.c:90 UCX DEBUG bus id 0x50000 doesn't exist. sys_dev = 0 [1642057062.175569] [n-62-8-1:164377:0] ib_device.c:1058 UCX DEBUG mlx5_0 bus id 0:5:0.0 sys_dev 0 [1642057062.175607] [n-62-8-1:164377:0] topo.c:82 UCX DEBUG bus id 0x50000 exists. sys_dev = 0 [1642057062.175610] [n-62-8-1:164377:0] ib_device.c:1058 UCX DEBUG mlx5_0 bus id 0:5:0.0 sys_dev 0 [1642057062.175644] [n-62-8-1:164377:0] topo.c:82 UCX DEBUG bus id 0x50000 exists. sys_dev = 0 [1642057062.175647] [n-62-8-1:164377:0] ib_device.c:1058 UCX DEBUG mlx5_0 bus id 0:5:0.0 sys_dev 0 [1642057062.175680] [n-62-8-1:164377:0] topo.c:82 UCX DEBUG bus id 0x50000 exists. sys_dev = 0 [1642057062.175683] [n-62-8-1:164377:0] ib_device.c:1058 UCX DEBUG mlx5_0 bus id 0:5:0.0 sys_dev 0 [1642057062.175715] [n-62-8-1:164377:0] topo.c:82 UCX DEBUG bus id 0x50000 exists. sys_dev = 0 [1642057062.175717] [n-62-8-1:164377:0] ib_device.c:1058 UCX DEBUG mlx5_0 bus id 0:5:0.0 sys_dev 0 [1642057062.175728] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.175735] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled # # Memory domain: mlx5_0 # Component: ib # register: unlimited, cost: 180 nsec # remote key: 8 bytes # local memory handle is required for zcopy # [1642057062.175760] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.175767] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled # Transport: rc_verbs # Device: mlx5_0:1 # System device: 0000:05:00.0 (0) [1642057062.175867] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.175875] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.176035] [n-62-8-1:164377:0] ib_iface.c:795 UCX DEBUG using pkey[0] 0xffff on mlx5_0:1 [1642057062.176873] [n-62-8-1:164377:0] ib_iface.c:1346 UCX DEBUG created uct_ib_iface_t headroom_ofs 12 payload_ofs 12 hdr_ofs 11 data_sz 8256 [1642057062.176903] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_recv_desc: align 64, maxelems 4294967295, elemsize 8276 [1642057062.176907] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_send_desc: align 64, maxelems 4294967295, elemsize 8320 [1642057062.176953] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool send-ops-mpool: align 64, maxelems 4294967295, elemsize 48 [1642057062.177777] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool pending-ops: align 1, maxelems 4294967295, elemsize 64 [1642057062.177783] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_verbs_short_desc: align 64, maxelems 4294967295, elemsize 192 [1642057062.178148] [n-62-8-1:164377:0] ib_iface.c:938 UCX DEBUG iface=0x34eda7c0: created RC QP 0xf5 on mlx5_0:1 TX wr:409 sge:3 inl:124 resp:64 RX wr:0 sge:0 resp:64 # # capabilities: # bandwidth: 6961.86/ppn + 0.00 MB/sec # latency: 600 + 1.000 * N nsec # overhead: 75 nsec # put_short: <= 124 # put_bcopy: <= 8256 # put_zcopy: <= 1G, up to 3 iov # put_opt_zcopy_align: <= 512 # put_align_mtu: <= 4K # get_bcopy: <= 8256 # get_zcopy: 65..1G, up to 3 iov # get_opt_zcopy_align: <= 512 # get_align_mtu: <= 4K # am_short: <= 123 # am_bcopy: <= 8255 # am_zcopy: <= 8255, up to 2 iov # am_opt_zcopy_align: <= 512 # am_align_mtu: <= 4K # am header: <= 127 # domain: device # atomic_add: 64 bit # atomic_fadd: 64 bit # atomic_cswap: 64 bit # connection: to ep # device priority: 38 # device num paths: 1 # max eps: 256 # device address: 3 bytes # ep address: 16 bytes # error handling: peer failure [1642057062.178845] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_verbs_short_desc destroyed [1642057062.179449] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool send-ops-mpool destroyed [1642057062.179453] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_send_desc destroyed [1642057062.179455] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_recv_desc destroyed [1642057062.179457] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool pending-ops destroyed # # [1642057062.179857] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.179866] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled # Transport: rc_mlx5 # Device: mlx5_0:1 # System device: 0000:05:00.0 (0) [1642057062.179968] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.179976] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.180078] [n-62-8-1:164377:0] ib_iface.c:795 UCX DEBUG using pkey[0] 0xffff on mlx5_0:1 [1642057062.180159] [n-62-8-1:164377:0] ib_device.c:1315 UCX DEBUG arm_board_vendor is 'HPE [1642057062.180159] [n-62-8-1:164377:0] ib_device.c:1315 UCX DEBUG ' [1642057062.180167] [n-62-8-1:164377:0] ib_device.c:1325 UCX DEBUG max IB CQE size is 128 [1642057062.181012] [n-62-8-1:164377:0] ib_iface.c:1346 UCX DEBUG created uct_ib_iface_t headroom_ofs 12 payload_ofs 12 hdr_ofs 10 data_sz 8256 [1642057062.181021] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_recv_desc: align 64, maxelems 4294967295, elemsize 8276 [1642057062.181025] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_send_desc: align 64, maxelems 4294967295, elemsize 8320 [1642057062.181064] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool send-ops-mpool: align 64, maxelems 4294967295, elemsize 48 [1642057062.181895] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool pending-ops: align 1, maxelems 4294967295, elemsize 64 [1642057062.181912] [n-62-8-1:164377:0] ib_mlx5.c:825 UCX DEBUG SL=0 (AR support - unknown) was selected on mlx5_0:1, SLs with AR support = { }, SLs without AR support = { } [1642057062.182396] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool mlx5_dm_desc: align 64, maxelems 1, elemsize 64 [1642057062.182402] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_mlx5_atomic_desc: align 64, maxelems 4294967295, elemsize 72 # # capabilities: # bandwidth: 6961.86/ppn + 0.00 MB/sec # latency: 600 + 1.000 * N nsec # overhead: 40 nsec # put_short: <= 2K # put_bcopy: <= 8256 # put_zcopy: <= 1G, up to 14 iov # put_opt_zcopy_align: <= 512 # put_align_mtu: <= 4K # get_bcopy: <= 8256 # get_zcopy: 65..1G, up to 14 iov # get_opt_zcopy_align: <= 512 # get_align_mtu: <= 4K # am_short: <= 2046 # am_bcopy: <= 8254 # am_zcopy: <= 8254, up to 3 iov # am_opt_zcopy_align: <= 512 # am_align_mtu: <= 4K # am header: <= 186 # domain: device # atomic_add: 64 bit # atomic_fadd: 64 bit # atomic_cswap: 64 bit # connection: to ep # device priority: 38 # device num paths: 1 # max eps: 256 # device address: 3 bytes # ep address: 7 bytes # error handling: buffer (zcopy), remote access, peer failure [1642057062.182450] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_mlx5_atomic_desc destroyed [1642057062.182454] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool mlx5_dm_desc destroyed [1642057062.183353] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool send-ops-mpool destroyed [1642057062.183357] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_send_desc destroyed [1642057062.183359] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_recv_desc destroyed [1642057062.183361] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool pending-ops destroyed # # [1642057062.183835] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.183844] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled # Transport: dc_mlx5 # Device: mlx5_0:1 # System device: 0000:05:00.0 (0) [1642057062.183942] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.183949] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.184037] [n-62-8-1:164377:0] ib_iface.c:795 UCX DEBUG using pkey[0] 0xffff on mlx5_0:1 [1642057062.184863] [n-62-8-1:164377:0] ib_iface.c:1346 UCX DEBUG created uct_ib_iface_t headroom_ofs 12 payload_ofs 12 hdr_ofs 10 data_sz 8256 [1642057062.184873] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_recv_desc: align 64, maxelems 4294967295, elemsize 8276 [1642057062.184876] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_send_desc: align 64, maxelems 4294967295, elemsize 8320 [1642057062.184904] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool send-ops-mpool: align 64, maxelems 4294967295, elemsize 48 [1642057062.185697] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool pending-ops: align 1, maxelems 4294967295, elemsize 104 [1642057062.185704] [n-62-8-1:164377:0] ib_mlx5.c:825 UCX DEBUG SL=0 (AR support - unknown) was selected on mlx5_0:1, SLs with AR support = { }, SLs without AR support = { } [1642057062.186010] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool mlx5_dm_desc: align 64, maxelems 1, elemsize 64 [1642057062.186014] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_mlx5_atomic_desc: align 64, maxelems 4294967295, elemsize 72 [1642057062.186274] [n-62-8-1:164377:0] dc_mlx5.c:766 UCX DEBUG creating 8 dci(s) [1642057062.186823] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.187300] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.187768] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.188248] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.188707] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.189195] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.189701] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.190195] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.190205] [n-62-8-1:164377:0] dc_mlx5.c:1214 UCX DEBUG dc iface 0x34eda7c0: using 'dcs_quota' policy with 8 dcis and 4608 cqes, dct 0x13f9 [1642057062.190222] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method huge [1642057062.190227] [n-62-8-1:164377:0] uct_mem.c:275 UCX TRACE failed to allocate 37481712 bytes from hugetlb: User-defined limit was reached [1642057062.190229] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method thp [1642057062.190261] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method md [1642057062.190272] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.190280] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.190284] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method mmap [1642057062.190296] [n-62-8-1:164377:0] uct_mem.c:294 UCX TRACE allocated 37486592 bytes at 0xffff883b0000 using mmap [1642057062.190304] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.190310] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.190343] [n-62-8-1:164377:0] mpool.c:206 UCX DEBUG mpool rcache_mp: allocated chunk 0xffff88380008 of 196600 bytes with 1365 elements [1642057062.194314] [n-62-8-1:164377:0] ib_md.c:779 UCX DEBUG registered memory 0xffff883b0000..0xffff8a770000 on mlx5_0 lkey 0x6c5d rkey 0x6c5d access 0xf flags 0x3e4 [1642057062.194327] [n-62-8-1:164377:0] rcache.c:764 UCX TRACE mlx5_0: created region 0x3501f720 [0xffff883b0000..0xffff8a770000] gt rw ref 2 lkey 0x6c5d rkey 0x6c5d atomic_rkey 0xffffffff [1642057062.194331] [n-62-8-1:164377:0] mpool.c:206 UCX DEBUG mpool rc_recv_desc: allocated chunk 0xffff883b0018 of 37486568 bytes with 4505 elements [1642057062.194510] [n-62-8-1:164377:0] dc_mlx5.c:1233 UCX DEBUG created dc iface 0x34eda7c0 # # capabilities: # bandwidth: 6961.86/ppn + 0.00 MB/sec # latency: 660 nsec # overhead: 40 nsec # put_short: <= 2K # put_bcopy: <= 8256 # put_zcopy: <= 1G, up to 11 iov # put_opt_zcopy_align: <= 512 # put_align_mtu: <= 4K # get_bcopy: <= 8256 # get_zcopy: 65..1G, up to 11 iov # get_opt_zcopy_align: <= 512 # get_align_mtu: <= 4K # am_short: <= 2046 # am_bcopy: <= 8254 # am_zcopy: <= 8254, up to 3 iov # am_opt_zcopy_align: <= 512 # am_align_mtu: <= 4K # am header: <= 138 # domain: device # atomic_add: 64 bit # atomic_fadd: 64 bit # atomic_cswap: 64 bit # connection: to iface # device priority: 38 # device num paths: 1 # max eps: inf # device address: 3 bytes # iface address: 5 bytes # error handling: buffer (zcopy), remote access, peer failure [1642057062.199911] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_mlx5_atomic_desc destroyed [1642057062.199916] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool mlx5_dm_desc destroyed [1642057062.200865] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool send-ops-mpool destroyed [1642057062.200869] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_send_desc destroyed [1642057062.200890] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_0: put region, flags 0x1 region 0x3501f720 [0xffff883b0000..0xffff8a770000] gt rw ref 2 lkey 0x6c5d rkey 0x6c5d atomic_rkey 0xffffffff [1642057062.200901] [n-62-8-1:164377:0] rcache.c:359 UCX TRACE mlx5_0: invalidate region 0x3501f720 [0xffff883b0000..0xffff8a770000] gt rw ref 1 lkey 0x6c5d rkey 0x6c5d atomic_rkey 0xffffffff [1642057062.200910] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_0: put region, flags 0xa region 0x3501f720 [0xffff883b0000..0xffff8a770000] g- rw ref 1 lkey 0x6c5d rkey 0x6c5d atomic_rkey 0xffffffff [1642057062.200914] [n-62-8-1:164377:0] rcache.c:334 UCX TRACE mlx5_0: put on GC list region 0x3501f720 [0xffff883b0000..0xffff8a770000] g- rw ref 0 lkey 0x6c5d rkey 0x6c5d atomic_rkey 0xffffffff [1642057062.201072] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_recv_desc destroyed [1642057062.201075] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool pending-ops destroyed # # [1642057062.201609] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.201618] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled # Transport: ud_verbs # Device: mlx5_0:1 # System device: 0000:05:00.0 (0) [1642057062.201680] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.201688] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.201805] [n-62-8-1:164377:0] ib_iface.c:795 UCX DEBUG using pkey[0] 0xffff on mlx5_0:1 [1642057062.202526] [n-62-8-1:164377:0] ib_iface.c:1346 UCX DEBUG created uct_ib_iface_t headroom_ofs 88 payload_ofs 88 hdr_ofs 40 data_sz 4096 [1642057062.202775] [n-62-8-1:164377:0] ib_iface.c:938 UCX DEBUG iface=0x34eda7c0: created UD QP 0xfe on mlx5_0:1 TX wr:341 sge:2 inl:124 resp:0 RX wr:4096 sge:1 resp:0 [1642057062.203089] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool ud_recv_skb: align 64, maxelems 4294967295, elemsize 4192 [1642057062.203094] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method huge [1642057062.203098] [n-62-8-1:164377:0] uct_mem.c:275 UCX TRACE failed to allocate 540784 bytes from hugetlb: User-defined limit was reached [1642057062.203100] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method thp [1642057062.203126] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method md [1642057062.203136] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.203143] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.203148] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method mmap [1642057062.203157] [n-62-8-1:164377:0] uct_mem.c:294 UCX TRACE allocated 589824 bytes at 0xffff8a6f0000 using mmap [1642057062.203164] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.203171] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.203180] [n-62-8-1:164377:0] rcache.c:295 UCX TRACE mlx5_0: destroy region 0x3501f720 [0xffff883b0000..0xffff8a770000] g- rw ref 0 lkey 0x6c5d rkey 0x6c5d atomic_rkey 0xffffffff [1642057062.203878] [n-62-8-1:164377:0] ib_md.c:779 UCX DEBUG registered memory 0xffff8a6f0000..0xffff8a780000 on mlx5_0 lkey 0xb8a7 rkey 0xb8a7 access 0xf flags 0x3e4 [1642057062.203883] [n-62-8-1:164377:0] rcache.c:764 UCX TRACE mlx5_0: created region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] gt rw ref 2 lkey 0xb8a7 rkey 0xb8a7 atomic_rkey 0xffffffff [1642057062.203887] [n-62-8-1:164377:0] mpool.c:206 UCX DEBUG mpool ud_recv_skb: allocated chunk 0xffff8a6f0018 of 589800 bytes with 139 elements [1642057062.203893] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool ud_tx_skb: align 64, maxelems 4294967295, elemsize 4168 [1642057062.203957] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::b883:3ff:ff64:1408 to hash on device mlx5_0 port 1 index 0) [1642057062.204010] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 1) [1642057062.204075] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 2) [1642057062.204118] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 3) [1642057062.204160] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 4) [1642057062.204202] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 5) [1642057062.204243] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 6) [1642057062.204283] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 7) [1642057062.204424] [n-62-8-1:164377:0] timer_wheel.c:41 UCX DEBUG high res timer created log=19 resolution=2621.440000 usec wanted: 2500.000000 usec # # capabilities: # bandwidth: 6961.86/ppn + 0.00 MB/sec # latency: 630 nsec # overhead: 105 nsec # am_short: <= 116 # am_bcopy: <= 4088 # am_zcopy: <= 4088, up to 1 iov # am_opt_zcopy_align: <= 512 # am_align_mtu: <= 4K # am header: <= 3952 # connection: to ep, to iface # device priority: 38 # device num paths: 1 # max eps: inf # device address: 3 bytes # iface address: 3 bytes # ep address: 6 bytes # error handling: peer failure [1642057062.204465] [n-62-8-1:164377:0] ud_iface.c:600 UCX DEBUG iface(0x34eda7c0): cep cleanup [1642057062.204468] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool ud_tx_skb destroyed [1642057062.204473] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_0: put region, flags 0x1 region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] gt rw ref 2 lkey 0xb8a7 rkey 0xb8a7 atomic_rkey 0xffffffff [1642057062.204479] [n-62-8-1:164377:0] rcache.c:359 UCX TRACE mlx5_0: invalidate region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] gt rw ref 1 lkey 0xb8a7 rkey 0xb8a7 atomic_rkey 0xffffffff [1642057062.204484] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_0: put region, flags 0xa region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] g- rw ref 1 lkey 0xb8a7 rkey 0xb8a7 atomic_rkey 0xffffffff [1642057062.204488] [n-62-8-1:164377:0] rcache.c:334 UCX TRACE mlx5_0: put on GC list region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] g- rw ref 0 lkey 0xb8a7 rkey 0xb8a7 atomic_rkey 0xffffffff [1642057062.204504] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool ud_recv_skb destroyed [1642057062.205115] [n-62-8-1:164377:0] ud_iface.c:607 UCX DEBUG iface(0x34eda7c0): ptr_array cleanup # # [1642057062.205492] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.205501] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled # Transport: ud_mlx5 # Device: mlx5_0:1 # System device: 0000:05:00.0 (0) [1642057062.205565] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.205572] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.205661] [n-62-8-1:164377:0] ib_iface.c:795 UCX DEBUG using pkey[0] 0xffff on mlx5_0:1 [1642057062.206303] [n-62-8-1:164377:0] ib_iface.c:1346 UCX DEBUG created uct_ib_iface_t headroom_ofs 88 payload_ofs 88 hdr_ofs 40 data_sz 4096 [1642057062.206495] [n-62-8-1:164377:0] ib_iface.c:938 UCX DEBUG iface=0x34eda7c0: created UD QP 0xff on mlx5_0:1 TX wr:341 sge:2 inl:124 resp:0 RX wr:4096 sge:1 resp:0 [1642057062.206761] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool ud_recv_skb: align 64, maxelems 4294967295, elemsize 4192 [1642057062.206765] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method huge [1642057062.206768] [n-62-8-1:164377:0] uct_mem.c:275 UCX TRACE failed to allocate 540784 bytes from hugetlb: User-defined limit was reached [1642057062.206770] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method thp [1642057062.206793] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method md [1642057062.206802] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.206809] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.206814] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method mmap [1642057062.206822] [n-62-8-1:164377:0] uct_mem.c:294 UCX TRACE allocated 589824 bytes at 0xffff8a6f0000 using mmap [1642057062.206829] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: cuda GPUDirect RDMA is disabled [1642057062.206836] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_0: rocm GPUDirect RDMA is disabled [1642057062.206845] [n-62-8-1:164377:0] rcache.c:295 UCX TRACE mlx5_0: destroy region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] g- rw ref 0 lkey 0xb8a7 rkey 0xb8a7 atomic_rkey 0xffffffff [1642057062.206974] [n-62-8-1:164377:0] ib_md.c:779 UCX DEBUG registered memory 0xffff8a6f0000..0xffff8a780000 on mlx5_0 lkey 0xc5b6 rkey 0xc5b6 access 0xf flags 0x3e4 [1642057062.206980] [n-62-8-1:164377:0] rcache.c:764 UCX TRACE mlx5_0: created region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] gt rw ref 2 lkey 0xc5b6 rkey 0xc5b6 atomic_rkey 0xffffffff [1642057062.206984] [n-62-8-1:164377:0] mpool.c:206 UCX DEBUG mpool ud_recv_skb: allocated chunk 0xffff8a6f0018 of 589800 bytes with 139 elements [1642057062.206990] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool ud_tx_skb: align 64, maxelems 4294967295, elemsize 4168 [1642057062.207046] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::b883:3ff:ff64:1408 to hash on device mlx5_0 port 1 index 0) [1642057062.207090] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 1) [1642057062.207130] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 2) [1642057062.207170] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 3) [1642057062.207209] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 4) [1642057062.207248] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 5) [1642057062.207287] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 6) [1642057062.207325] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80:: to hash on device mlx5_0 port 1 index 7) [1642057062.207332] [n-62-8-1:164377:0] ib_mlx5.c:825 UCX DEBUG SL=0 (AR support - unknown) was selected on mlx5_0:1, SLs with AR support = { }, SLs without AR support = { } [1642057062.207385] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.207416] [n-62-8-1:164377:0] timer_wheel.c:41 UCX DEBUG high res timer created log=19 resolution=2621.440000 usec wanted: 2500.000000 usec # # capabilities: # bandwidth: 6961.86/ppn + 0.00 MB/sec # latency: 630 nsec # overhead: 80 nsec # am_short: <= 180 # am_bcopy: <= 4088 # am_zcopy: <= 4088, up to 3 iov # am_opt_zcopy_align: <= 512 # am_align_mtu: <= 4K # am header: <= 132 # connection: to ep, to iface # device priority: 38 # device num paths: 1 # max eps: inf # device address: 3 bytes # iface address: 3 bytes # ep address: 6 bytes # error handling: peer failure [1642057062.207465] [n-62-8-1:164377:0] ud_iface.c:600 UCX DEBUG iface(0x34eda7c0): cep cleanup [1642057062.207468] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool ud_tx_skb destroyed [1642057062.207473] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_0: put region, flags 0x1 region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] gt rw ref 2 lkey 0xc5b6 rkey 0xc5b6 atomic_rkey 0xffffffff [1642057062.207479] [n-62-8-1:164377:0] rcache.c:359 UCX TRACE mlx5_0: invalidate region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] gt rw ref 1 lkey 0xc5b6 rkey 0xc5b6 atomic_rkey 0xffffffff [1642057062.207484] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_0: put region, flags 0xa region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] g- rw ref 1 lkey 0xc5b6 rkey 0xc5b6 atomic_rkey 0xffffffff [1642057062.207487] [n-62-8-1:164377:0] rcache.c:334 UCX TRACE mlx5_0: put on GC list region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] g- rw ref 0 lkey 0xc5b6 rkey 0xc5b6 atomic_rkey 0xffffffff [1642057062.207503] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool ud_recv_skb destroyed [1642057062.208118] [n-62-8-1:164377:0] ud_iface.c:607 UCX DEBUG iface(0x34eda7c0): ptr_array cleanup # [1642057062.208489] [n-62-8-1:164377:0] rcache.c:295 UCX TRACE mlx5_0: destroy region 0x3501f720 [0xffff8a6f0000..0xffff8a780000] g- rw ref 0 lkey 0xc5b6 rkey 0xc5b6 atomic_rkey 0xffffffff [1642057062.208558] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rcache_mp destroyed [1642057062.208637] [n-62-8-1:164377:0] ib_device.c:611 UCX DEBUG destroying ib device mlx5_0 [1642057062.208644] [n-62-8-1:164377:0] async.c:156 UCX DEBUG removed async handler 0x34e89540 [id=4 ref 1] uct_ib_async_event_handler() from hash [1642057062.208648] [n-62-8-1:164377:0] async.c:557 UCX DEBUG removing async handler 0x34e89540 [id=4 ref 1] uct_ib_async_event_handler() [1642057062.208705] [n-62-8-1:164377:0] async.c:577 UCX TRACE waiting for 0x34e89540 [id=4 ref 1] uct_ib_async_event_handler() completion (called=0) [1642057062.208708] [n-62-8-1:164377:0] async.c:171 UCX DEBUG release async handler 0x34e89540 [id=4 ref 0] uct_ib_async_event_handler() [1642057062.208929] [n-62-8-1:164377:0] ib_md.c:1488 UCX TRACE opening IB device mlx5_3 [1642057062.212639] [n-62-8-1:164377:0] ib_mlx5dv_md.c:607 UCX DEBUG mlx5dv_open_device(mlx5_3) failed: Bad file descriptor [1642057062.212645] [n-62-8-1:164377:0] ib_md.c:1548 UCX DEBUG mlx5_3: md open by 'uct_ib_mlx5_devx_md_ops' failed, trying next [1642057062.215130] [n-62-8-1:164377:0] ib_device.c:496 UCX DEBUG mlx5_3 vendor_id: 0x15b3 device_id: 4119 [1642057062.215135] [n-62-8-1:164377:0] ib_mlx5dv_md.c:816 UCX DEBUG checking for DC support on mlx5_3 [1642057062.216486] [n-62-8-1:164377:0] ib_mlx5dv_md.c:890 UCX DEBUG DC is supported on mlx5_3 [1642057062.217722] [n-62-8-1:164377:0] async.c:231 UCX DEBUG added async handler 0x34e87120 [id=4 ref 1] uct_ib_async_event_handler() to hash [1642057062.217806] [n-62-8-1:164377:0] async.c:504 UCX DEBUG listening to async event fd 4 events 0x1 mode thread_spinlock [1642057062.217810] [n-62-8-1:164377:0] ib_device.c:593 UCX DEBUG initialized device 'mlx5_3' (InfiniBand channel adapter) with 1 ports [1642057062.217886] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_3: cuda GPUDirect RDMA is disabled [1642057062.217895] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_3: rocm GPUDirect RDMA is disabled [1642057062.217903] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rcache_mp: align 8, maxelems 4294967295, elemsize 144 [1642057062.217983] [n-62-8-1:164377:0] ib_md.c:1257 UCX DEBUG mlx5_3: using registration cache [1642057062.218072] [n-62-8-1:164377:0] ib_md.c:1453 UCX TRACE mlx5_3: pcie 8x gen3, effective throughput 6961.862MB/s (54.390Gb/s) [1642057062.218075] [n-62-8-1:164377:0] ib_md.c:1541 UCX DEBUG mlx5_3: md open by 'uct_ib_mlx5_md_ops' is successful [1642057062.218082] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_3:1 is not active (state: 1) [1642057062.218085] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_3:1 does not support flags 0x0: Destination is unreachable [1642057062.218087] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0x0 [1642057062.218090] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query rc_verbs resources: No such device [1642057062.218093] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_3:1 is not active (state: 1) [1642057062.218095] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_3:1 does not support flags 0x4: Destination is unreachable [1642057062.218097] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0x4 [1642057062.218099] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query rc_mlx5 resources: No such device [1642057062.218102] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_3:1 is not active (state: 1) [1642057062.218104] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_3:1 does not support flags 0xc4: Destination is unreachable [1642057062.218106] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0xc4 [1642057062.218108] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query dc_mlx5 resources: No such device [1642057062.218110] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_3:1 is not active (state: 1) [1642057062.218112] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_3:1 does not support flags 0x0: Destination is unreachable [1642057062.218114] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0x0 [1642057062.218116] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query ud_verbs resources: No such device [1642057062.218118] [n-62-8-1:164377:0] ib_device.c:692 UCX TRACE mlx5_3:1 is not active (state: 1) [1642057062.218120] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_3:1 does not support flags 0x4: Destination is unreachable [1642057062.218122] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0x4 [1642057062.218123] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query ud_mlx5 resources: No such device [1642057062.218134] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_3: cuda GPUDirect RDMA is disabled [1642057062.218141] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_3: rocm GPUDirect RDMA is disabled # # Memory domain: mlx5_3 # Component: ib # register: unlimited, cost: 180 nsec # remote key: 8 bytes # local memory handle is required for zcopy # < no supported devices found > [1642057062.218154] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rcache_mp destroyed [1642057062.218208] [n-62-8-1:164377:0] ib_device.c:611 UCX DEBUG destroying ib device mlx5_3 [1642057062.218213] [n-62-8-1:164377:0] async.c:156 UCX DEBUG removed async handler 0x34e87120 [id=4 ref 1] uct_ib_async_event_handler() from hash [1642057062.218216] [n-62-8-1:164377:0] async.c:557 UCX DEBUG removing async handler 0x34e87120 [id=4 ref 1] uct_ib_async_event_handler() [1642057062.218256] [n-62-8-1:164377:0] async.c:577 UCX TRACE waiting for 0x34e87120 [id=4 ref 1] uct_ib_async_event_handler() completion (called=0) [1642057062.218259] [n-62-8-1:164377:0] async.c:171 UCX DEBUG release async handler 0x34e87120 [id=4 ref 0] uct_ib_async_event_handler() [1642057062.218453] [n-62-8-1:164377:0] ib_md.c:1488 UCX TRACE opening IB device mlx5_1 [1642057062.223296] [n-62-8-1:164377:0] ib_mlx5dv_md.c:607 UCX DEBUG mlx5dv_open_device(mlx5_1) failed: Bad file descriptor [1642057062.223301] [n-62-8-1:164377:0] ib_md.c:1548 UCX DEBUG mlx5_1: md open by 'uct_ib_mlx5_devx_md_ops' failed, trying next [1642057062.225207] [n-62-8-1:164377:0] ib_device.c:496 UCX DEBUG mlx5_1 vendor_id: 0x15b3 device_id: 4117 [1642057062.225211] [n-62-8-1:164377:0] ib_mlx5dv_md.c:816 UCX DEBUG checking for DC support on mlx5_1 [1642057062.226306] [n-62-8-1:164377:0] ib_mlx5dv_md.c:854 UCX DEBUG failed to create DCT on mlx5_1: Operation not supported [1642057062.227206] [n-62-8-1:164377:0] async.c:231 UCX DEBUG added async handler 0x34e87120 [id=4 ref 1] uct_ib_async_event_handler() to hash [1642057062.227288] [n-62-8-1:164377:0] async.c:504 UCX DEBUG listening to async event fd 4 events 0x1 mode thread_spinlock [1642057062.227293] [n-62-8-1:164377:0] ib_device.c:593 UCX DEBUG initialized device 'mlx5_1' (InfiniBand channel adapter) with 1 ports [1642057062.227376] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.227384] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled [1642057062.227392] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rcache_mp: align 8, maxelems 4294967295, elemsize 144 [1642057062.227467] [n-62-8-1:164377:0] ib_md.c:1257 UCX DEBUG mlx5_1: using registration cache [1642057062.227548] [n-62-8-1:164377:0] ib_md.c:1453 UCX TRACE mlx5_1: pcie 8x gen3, effective throughput 6961.862MB/s (54.390Gb/s) [1642057062.227551] [n-62-8-1:164377:0] ib_md.c:1541 UCX DEBUG mlx5_1: md open by 'uct_ib_mlx5_md_ops' is successful [1642057062.227598] [n-62-8-1:164377:0] topo.c:90 UCX DEBUG bus id 0xb0000 doesn't exist. sys_dev = 1 [1642057062.227602] [n-62-8-1:164377:0] ib_device.c:1058 UCX DEBUG mlx5_1 bus id 0:11:0.0 sys_dev 1 [1642057062.227639] [n-62-8-1:164377:0] topo.c:82 UCX DEBUG bus id 0xb0000 exists. sys_dev = 1 [1642057062.227642] [n-62-8-1:164377:0] ib_device.c:1058 UCX DEBUG mlx5_1 bus id 0:11:0.0 sys_dev 1 [1642057062.227646] [n-62-8-1:164377:0] ib_device.c:710 UCX TRACE mlx5_1:1 does not support DC [1642057062.227649] [n-62-8-1:164377:0] ib_device.c:1089 UCX TRACE mlx5_1:1 does not support flags 0xc4: Unsupported operation [1642057062.227651] [n-62-8-1:164377:0] ib_device.c:1103 UCX DEBUG no compatible IB ports found for flags 0xc4 [1642057062.227653] [n-62-8-1:164377:0] uct_md.c:85 UCX DEBUG failed to query dc_mlx5 resources: No such device [1642057062.227686] [n-62-8-1:164377:0] topo.c:82 UCX DEBUG bus id 0xb0000 exists. sys_dev = 1 [1642057062.227689] [n-62-8-1:164377:0] ib_device.c:1058 UCX DEBUG mlx5_1 bus id 0:11:0.0 sys_dev 1 [1642057062.227722] [n-62-8-1:164377:0] topo.c:82 UCX DEBUG bus id 0xb0000 exists. sys_dev = 1 [1642057062.227724] [n-62-8-1:164377:0] ib_device.c:1058 UCX DEBUG mlx5_1 bus id 0:11:0.0 sys_dev 1 [1642057062.227734] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.227741] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled # # Memory domain: mlx5_1 # Component: ib # register: unlimited, cost: 180 nsec # remote key: 8 bytes # local memory handle is required for zcopy # [1642057062.227760] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.227767] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled # Transport: rc_verbs # Device: mlx5_1:1 # System device: 0000:0b:00.0 (1) [1642057062.227849] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.227856] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled [1642057062.227960] [n-62-8-1:164377:0] ib_iface.c:795 UCX DEBUG using pkey[0] 0xffff on mlx5_1:1 [1642057062.228283] [n-62-8-1:164377:0] ib_device.c:926 UCX DEBUG mlx5_1:1 using gid_index 5 [1642057062.228382] [n-62-8-1:164377:0] sock.c:128 UCX DIAG failed to read from /sys/class/net/enp11s0f0/bonding/ad_num_ports: No such file or directory, assuming 802.3ad bonding is disabled [1642057062.228387] [n-62-8-1:164377:0] ib_device.c:1373 UCX DEBUG RoCE LAG level on mlx5_1:1 (enp11s0f0) is 1 [1642057062.229184] [n-62-8-1:164377:0] ib_iface.c:1346 UCX DEBUG created uct_ib_iface_t headroom_ofs 12 payload_ofs 12 hdr_ofs 11 data_sz 8256 [1642057062.229194] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_recv_desc: align 64, maxelems 4294967295, elemsize 8276 [1642057062.229198] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_send_desc: align 64, maxelems 4294967295, elemsize 8320 [1642057062.229224] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool send-ops-mpool: align 64, maxelems 4294967295, elemsize 48 [1642057062.230028] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool pending-ops: align 1, maxelems 4294967295, elemsize 64 [1642057062.230033] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_verbs_short_desc: align 64, maxelems 4294967295, elemsize 192 [1642057062.230380] [n-62-8-1:164377:0] ib_iface.c:938 UCX DEBUG iface=0x34eda7c0: created RC QP 0x13e on mlx5_1:1 TX wr:409 sge:3 inl:124 resp:64 RX wr:0 sge:0 resp:64 # # capabilities: # bandwidth: 219.16/ppn + 0.00 MB/sec # latency: 5200 + 1.000 * N nsec # overhead: 75 nsec # put_short: <= 124 # put_bcopy: <= 8256 # put_zcopy: <= 1G, up to 3 iov # put_opt_zcopy_align: <= 512 # put_align_mtu: <= 1K # get_bcopy: <= 8256 # get_zcopy: 65..1G, up to 3 iov # get_opt_zcopy_align: <= 512 # get_align_mtu: <= 1K # am_short: <= 123 # am_bcopy: <= 8255 # am_zcopy: <= 8255, up to 2 iov # am_opt_zcopy_align: <= 512 # am_align_mtu: <= 1K # am header: <= 127 # domain: device # atomic_add: 64 bit # atomic_fadd: 64 bit # atomic_cswap: 64 bit # connection: to ep # device priority: 28 # device num paths: 1 # max eps: 256 # device address: 18 bytes # ep address: 16 bytes # error handling: peer failure [1642057062.230780] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_verbs_short_desc destroyed [1642057062.231462] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool send-ops-mpool destroyed [1642057062.231466] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_send_desc destroyed [1642057062.231468] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_recv_desc destroyed [1642057062.231470] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool pending-ops destroyed # # [1642057062.232010] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.232019] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled # Transport: rc_mlx5 # Device: mlx5_1:1 # System device: 0000:0b:00.0 (1) [1642057062.232109] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.232116] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled [1642057062.232209] [n-62-8-1:164377:0] ib_iface.c:795 UCX DEBUG using pkey[0] 0xffff on mlx5_1:1 [1642057062.232479] [n-62-8-1:164377:0] ib_device.c:926 UCX DEBUG mlx5_1:1 using gid_index 5 [1642057062.232559] [n-62-8-1:164377:0] sock.c:128 UCX DIAG failed to read from /sys/class/net/enp11s0f0/bonding/ad_num_ports: No such file or directory, assuming 802.3ad bonding is disabled [1642057062.232563] [n-62-8-1:164377:0] ib_device.c:1373 UCX DEBUG RoCE LAG level on mlx5_1:1 (enp11s0f0) is 1 [1642057062.233434] [n-62-8-1:164377:0] ib_iface.c:1346 UCX DEBUG created uct_ib_iface_t headroom_ofs 12 payload_ofs 12 hdr_ofs 10 data_sz 8256 [1642057062.233470] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_recv_desc: align 64, maxelems 4294967295, elemsize 8276 [1642057062.233474] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_send_desc: align 64, maxelems 4294967295, elemsize 8320 [1642057062.233498] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool send-ops-mpool: align 64, maxelems 4294967295, elemsize 48 [1642057062.234433] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool pending-ops: align 1, maxelems 4294967295, elemsize 64 [1642057062.234446] [n-62-8-1:164377:0] ib_mlx5.c:825 UCX DEBUG SL=0 (AR support - no) was selected on mlx5_1:1, SLs with AR support = { }, SLs without AR support = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } [1642057062.234518] [n-62-8-1:164377:0] rc_mlx5_common.c:681 UCX DEBUG ibv_alloc_dm(dev=mlx5_1 length=2048) failed: Invalid argument [1642057062.234523] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool rc_mlx5_atomic_desc: align 64, maxelems 4294967295, elemsize 72 # # capabilities: # bandwidth: 219.16/ppn + 0.00 MB/sec # latency: 5200 + 1.000 * N nsec # overhead: 40 nsec # put_short: <= 220 # put_bcopy: <= 8256 # put_zcopy: <= 1G, up to 14 iov # put_opt_zcopy_align: <= 512 # put_align_mtu: <= 1K # get_bcopy: <= 8256 # get_zcopy: 65..1G, up to 14 iov # get_opt_zcopy_align: <= 512 # get_align_mtu: <= 1K # am_short: <= 234 # am_bcopy: <= 8254 # am_zcopy: <= 8254, up to 3 iov # am_opt_zcopy_align: <= 512 # am_align_mtu: <= 1K # am header: <= 186 # domain: device # atomic_add: 64 bit # atomic_fadd: 64 bit # atomic_cswap: 64 bit # connection: to ep # device priority: 28 # device num paths: 1 # max eps: 256 # device address: 18 bytes # ep address: 7 bytes # error handling: buffer (zcopy), remote access, peer failure [1642057062.234568] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_mlx5_atomic_desc destroyed [1642057062.235322] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool send-ops-mpool destroyed [1642057062.235326] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_send_desc destroyed [1642057062.235328] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rc_recv_desc destroyed [1642057062.235330] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool pending-ops destroyed # # [1642057062.235867] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.235875] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled # Transport: ud_verbs # Device: mlx5_1:1 # System device: 0000:0b:00.0 (1) [1642057062.235933] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.235940] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled [1642057062.236046] [n-62-8-1:164377:0] ib_iface.c:795 UCX DEBUG using pkey[0] 0xffff on mlx5_1:1 [1642057062.236315] [n-62-8-1:164377:0] ib_device.c:926 UCX DEBUG mlx5_1:1 using gid_index 5 [1642057062.236393] [n-62-8-1:164377:0] sock.c:128 UCX DIAG failed to read from /sys/class/net/enp11s0f0/bonding/ad_num_ports: No such file or directory, assuming 802.3ad bonding is disabled [1642057062.236397] [n-62-8-1:164377:0] ib_device.c:1373 UCX DEBUG RoCE LAG level on mlx5_1:1 (enp11s0f0) is 1 [1642057062.236996] [n-62-8-1:164377:0] ib_iface.c:1346 UCX DEBUG created uct_ib_iface_t headroom_ofs 88 payload_ofs 88 hdr_ofs 40 data_sz 1024 [1642057062.237414] [n-62-8-1:164377:0] ib_iface.c:938 UCX DEBUG iface=0x34eda7c0: created UD QP 0x13f on mlx5_1:1 TX wr:341 sge:2 inl:124 resp:0 RX wr:4096 sge:1 resp:0 [1642057062.237750] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool ud_recv_skb: align 64, maxelems 4294967295, elemsize 1120 [1642057062.237754] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method huge [1642057062.237758] [n-62-8-1:164377:0] uct_mem.c:275 UCX TRACE failed to allocate 147568 bytes from hugetlb: User-defined limit was reached [1642057062.237760] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method thp [1642057062.237782] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method md [1642057062.237793] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.237800] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled [1642057062.237804] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method mmap [1642057062.237813] [n-62-8-1:164377:0] uct_mem.c:294 UCX TRACE allocated 196608 bytes at 0xffff8a750000 using mmap [1642057062.237820] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.237827] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled [1642057062.237847] [n-62-8-1:164377:0] mpool.c:206 UCX DEBUG mpool rcache_mp: allocated chunk 0xffff8a720008 of 196600 bytes with 1365 elements [1642057062.237929] [n-62-8-1:164377:0] ib_md.c:779 UCX DEBUG registered memory 0xffff8a750000..0xffff8a780000 on mlx5_1 lkey 0x12e1e rkey 0x12e1e access 0xf flags 0x3e4 [1642057062.237936] [n-62-8-1:164377:0] rcache.c:764 UCX TRACE mlx5_1: created region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] gt rw ref 2 lkey 0x12e1e rkey 0x12e1e atomic_rkey 0xffffffff [1642057062.237939] [n-62-8-1:164377:0] mpool.c:206 UCX DEBUG mpool ud_recv_skb: allocated chunk 0xffff8a750018 of 196584 bytes with 170 elements [1642057062.237945] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool ud_tx_skb: align 64, maxelems 4294967295, elemsize 1096 [1642057062.238001] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::609:73ff:feb8:3c88 to hash on device mlx5_1 port 1 index 0) [1642057062.238045] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::609:73ff:feb8:3c88 to hash on device mlx5_1 port 1 index 1) [1642057062.238087] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::c88f:606f:4af6:fe54 to hash on device mlx5_1 port 1 index 2) [1642057062.238127] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::c88f:606f:4af6:fe54 to hash on device mlx5_1 port 1 index 3) [1642057062.238168] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid ::ffff:10.66.8.1 to hash on device mlx5_1 port 1 index 4) [1642057062.238208] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid ::ffff:10.66.8.1 to hash on device mlx5_1 port 1 index 5) [1642057062.247014] [n-62-8-1:164377:0] timer_wheel.c:41 UCX DEBUG high res timer created log=19 resolution=2621.440000 usec wanted: 2500.000000 usec # # capabilities: # bandwidth: 219.16/ppn + 0.00 MB/sec # latency: 5230 nsec # overhead: 105 nsec # am_short: <= 116 # am_bcopy: <= 1016 # am_zcopy: <= 1016, up to 1 iov # am_opt_zcopy_align: <= 512 # am_align_mtu: <= 1K # am header: <= 880 # connection: to ep, to iface # device priority: 28 # device num paths: 1 # max eps: inf # device address: 18 bytes # iface address: 3 bytes # ep address: 6 bytes # error handling: peer failure [1642057062.247046] [n-62-8-1:164377:0] ud_iface.c:600 UCX DEBUG iface(0x34eda7c0): cep cleanup [1642057062.247049] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool ud_tx_skb destroyed [1642057062.247056] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_1: put region, flags 0x1 region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] gt rw ref 2 lkey 0x12e1e rkey 0x12e1e atomic_rkey 0xffffffff [1642057062.247075] [n-62-8-1:164377:0] rcache.c:359 UCX TRACE mlx5_1: invalidate region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] gt rw ref 1 lkey 0x12e1e rkey 0x12e1e atomic_rkey 0xffffffff [1642057062.247080] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_1: put region, flags 0xa region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] g- rw ref 1 lkey 0x12e1e rkey 0x12e1e atomic_rkey 0xffffffff [1642057062.247084] [n-62-8-1:164377:0] rcache.c:334 UCX TRACE mlx5_1: put on GC list region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] g- rw ref 0 lkey 0x12e1e rkey 0x12e1e atomic_rkey 0xffffffff [1642057062.247097] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool ud_recv_skb destroyed [1642057062.247949] [n-62-8-1:164377:0] ud_iface.c:607 UCX DEBUG iface(0x34eda7c0): ptr_array cleanup # # [1642057062.248360] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.248368] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled # Transport: ud_mlx5 # Device: mlx5_1:1 # System device: 0000:0b:00.0 (1) [1642057062.248432] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.248440] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled [1642057062.248533] [n-62-8-1:164377:0] ib_iface.c:795 UCX DEBUG using pkey[0] 0xffff on mlx5_1:1 [1642057062.248800] [n-62-8-1:164377:0] ib_device.c:926 UCX DEBUG mlx5_1:1 using gid_index 5 [1642057062.248880] [n-62-8-1:164377:0] sock.c:128 UCX DIAG failed to read from /sys/class/net/enp11s0f0/bonding/ad_num_ports: No such file or directory, assuming 802.3ad bonding is disabled [1642057062.248884] [n-62-8-1:164377:0] ib_device.c:1373 UCX DEBUG RoCE LAG level on mlx5_1:1 (enp11s0f0) is 1 [1642057062.249524] [n-62-8-1:164377:0] ib_iface.c:1346 UCX DEBUG created uct_ib_iface_t headroom_ofs 88 payload_ofs 88 hdr_ofs 40 data_sz 1024 [1642057062.249804] [n-62-8-1:164377:0] ib_iface.c:938 UCX DEBUG iface=0x34eda7c0: created UD QP 0x140 on mlx5_1:1 TX wr:341 sge:2 inl:124 resp:0 RX wr:4096 sge:1 resp:0 [1642057062.250100] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool ud_recv_skb: align 64, maxelems 4294967295, elemsize 1120 [1642057062.250104] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method huge [1642057062.250108] [n-62-8-1:164377:0] uct_mem.c:275 UCX TRACE failed to allocate 147568 bytes from hugetlb: User-defined limit was reached [1642057062.250110] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method thp [1642057062.250132] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method md [1642057062.250142] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.250149] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled [1642057062.250154] [n-62-8-1:164377:0] uct_mem.c:106 UCX TRACE trying allocation method mmap [1642057062.250163] [n-62-8-1:164377:0] uct_mem.c:294 UCX TRACE allocated 196608 bytes at 0xffff8a750000 using mmap [1642057062.250170] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: cuda GPUDirect RDMA is disabled [1642057062.250178] [n-62-8-1:164377:0] ib_md.c:269 UCX DEBUG mlx5_1: rocm GPUDirect RDMA is disabled [1642057062.250187] [n-62-8-1:164377:0] rcache.c:295 UCX TRACE mlx5_1: destroy region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] g- rw ref 0 lkey 0x12e1e rkey 0x12e1e atomic_rkey 0xffffffff [1642057062.250273] [n-62-8-1:164377:0] ib_md.c:779 UCX DEBUG registered memory 0xffff8a750000..0xffff8a780000 on mlx5_1 lkey 0x13d2d rkey 0x13d2d access 0xf flags 0x3e4 [1642057062.250279] [n-62-8-1:164377:0] rcache.c:764 UCX TRACE mlx5_1: created region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] gt rw ref 2 lkey 0x13d2d rkey 0x13d2d atomic_rkey 0xffffffff [1642057062.250282] [n-62-8-1:164377:0] mpool.c:206 UCX DEBUG mpool ud_recv_skb: allocated chunk 0xffff8a750018 of 196584 bytes with 170 elements [1642057062.250289] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool ud_tx_skb: align 64, maxelems 4294967295, elemsize 1096 [1642057062.250345] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::609:73ff:feb8:3c88 to hash on device mlx5_1 port 1 index 0) [1642057062.250389] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::609:73ff:feb8:3c88 to hash on device mlx5_1 port 1 index 1) [1642057062.250430] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::c88f:606f:4af6:fe54 to hash on device mlx5_1 port 1 index 2) [1642057062.250470] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid fe80::c88f:606f:4af6:fe54 to hash on device mlx5_1 port 1 index 3) [1642057062.250511] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid ::ffff:10.66.8.1 to hash on device mlx5_1 port 1 index 4) [1642057062.250550] [n-62-8-1:164377:0] ud_iface.c:387 UCX DEBUG iface 0x34eda7c0: adding gid ::ffff:10.66.8.1 to hash on device mlx5_1 port 1 index 5) [1642057062.258734] [n-62-8-1:164377:0] ib_mlx5.c:825 UCX DEBUG SL=0 (AR support - no) was selected on mlx5_1:1, SLs with AR support = { }, SLs without AR support = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } [1642057062.258789] [n-62-8-1:164377:0] ib_mlx5.c:547 UCX DEBUG tx wq 65536 bytes [bb=64, nwqe=1024] mmio_mode db [1642057062.258819] [n-62-8-1:164377:0] timer_wheel.c:41 UCX DEBUG high res timer created log=19 resolution=2621.440000 usec wanted: 2500.000000 usec # # capabilities: # bandwidth: 219.16/ppn + 0.00 MB/sec # latency: 5230 nsec # overhead: 80 nsec # am_short: <= 180 # am_bcopy: <= 1016 # am_zcopy: <= 1016, up to 3 iov # am_opt_zcopy_align: <= 512 # am_align_mtu: <= 1K # am header: <= 132 # connection: to ep, to iface # device priority: 28 # device num paths: 1 # max eps: inf # device address: 18 bytes # iface address: 3 bytes # ep address: 6 bytes # error handling: peer failure [1642057062.258852] [n-62-8-1:164377:0] ud_iface.c:600 UCX DEBUG iface(0x34eda7c0): cep cleanup [1642057062.258855] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool ud_tx_skb destroyed [1642057062.258861] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_1: put region, flags 0x1 region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] gt rw ref 2 lkey 0x13d2d rkey 0x13d2d atomic_rkey 0xffffffff [1642057062.258867] [n-62-8-1:164377:0] rcache.c:359 UCX TRACE mlx5_1: invalidate region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] gt rw ref 1 lkey 0x13d2d rkey 0x13d2d atomic_rkey 0xffffffff [1642057062.258871] [n-62-8-1:164377:0] rcache.c:323 UCX TRACE mlx5_1: put region, flags 0xa region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] g- rw ref 1 lkey 0x13d2d rkey 0x13d2d atomic_rkey 0xffffffff [1642057062.258875] [n-62-8-1:164377:0] rcache.c:334 UCX TRACE mlx5_1: put on GC list region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] g- rw ref 0 lkey 0x13d2d rkey 0x13d2d atomic_rkey 0xffffffff [1642057062.258887] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool ud_recv_skb destroyed [1642057062.259666] [n-62-8-1:164377:0] ud_iface.c:607 UCX DEBUG iface(0x34eda7c0): ptr_array cleanup # [1642057062.260076] [n-62-8-1:164377:0] rcache.c:295 UCX TRACE mlx5_1: destroy region 0x34edf0c0 [0xffff8a750000..0xffff8a780000] g- rw ref 0 lkey 0x13d2d rkey 0x13d2d atomic_rkey 0xffffffff [1642057062.260128] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool rcache_mp destroyed [1642057062.260196] [n-62-8-1:164377:0] ib_device.c:611 UCX DEBUG destroying ib device mlx5_1 [1642057062.260213] [n-62-8-1:164377:0] async.c:156 UCX DEBUG removed async handler 0x34e87120 [id=4 ref 1] uct_ib_async_event_handler() from hash [1642057062.260217] [n-62-8-1:164377:0] async.c:557 UCX DEBUG removing async handler 0x34e87120 [id=4 ref 1] uct_ib_async_event_handler() [1642057062.260259] [n-62-8-1:164377:0] async.c:577 UCX TRACE waiting for 0x34e87120 [id=4 ref 1] uct_ib_async_event_handler() completion (called=0) [1642057062.260262] [n-62-8-1:164377:0] async.c:171 UCX DEBUG release async handler 0x34e87120 [id=4 ref 0] uct_ib_async_event_handler() # # Memory domain: rdmacm # Component: rdmacm # supports client-server connection establishment via sockaddr # < no supported devices found > [1642057062.261315] [n-62-8-1:164377:0] cma_md.c:92 UCX TRACE ptrace_scope is 0, CMA is supported [1642057062.261332] [n-62-8-1:164377:0] cma_md.c:92 UCX TRACE ptrace_scope is 0, CMA is supported # # Memory domain: cma # Component: cma # register: unlimited, cost: 9 nsec # # Transport: cma # Device: memory # System device: [1642057062.261388] [n-62-8-1:164377:0] mpool.c:89 UCX DEBUG mpool uct_scopy_iface_tx_mp: align 64, maxelems 4294967295, elemsize 736 # # capabilities: # bandwidth: 0.00/ppn + 11145.00 MB/sec # latency: 80 nsec # overhead: 400 nsec # put_zcopy: unlimited, up to 16 iov # put_opt_zcopy_align: <= 1 # put_align_mtu: <= 1 # get_zcopy: unlimited, up to 16 iov # get_opt_zcopy_align: <= 1 # get_align_mtu: <= 1 # connection: to iface # device priority: 0 # device num paths: 1 # max eps: inf # device address: 8 bytes # iface address: 4 bytes # error handling: none [1642057062.261411] [n-62-8-1:164377:0] mpool.c:143 UCX DEBUG mpool uct_scopy_iface_tx_mp destroyed #