You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Description
Running the following tests after setting FI_OFI_RXM_USE_SRX=true results in assertion failure for verbs;ofi_rxm:
fi_cq_data -e rdm -o writedata -p "verbs;ofi_rxm"
fi_rma_bw -e rdm -o writedata -p "verbs;ofi_rxm"
fi_rma_bw -e rdm -o writedata -u -p "verbs;ofi_rxm"
fi_multinode_coll -n 3 -p "verbs;ofi_rxm"
To Reproduce
Steps to reproduce the behavior:
set FI_OFI_RXM_USE_SRX=true environment variable and run following tests:
server: fi_rma_bw -e rdm -o writedata -p "verbs;ofi_rxm" -s <server_ip>
client: fi_rma_bw -e rdm -o writedata -p "verbs;ofi_rxm" -s <client_ip> <server_ip>
Output
Backtrace from gdb:
(gdb) bt
#0 0x00007ffff56fbcbb in raise () from /lib64/libc.so.6 #1 0x00007ffff56fd355 in abort () from /lib64/libc.so.6 #2 0x00007ffff56f3cba in __assert_fail_base () from /lib64/libc.so.6 #3 0x00007ffff56f3d42 in __assert_fail () from /lib64/libc.so.6 #4 0x00007ffff77e3e20 in vrb_alloc_ctx (progress=0x4341c0)
at prov/verbs/src/verbs_ofi.h:983 #5 0x00007ffff77e9760 in vrb_post_srq (srx=0xf4b810,
wr=0x7fffffffd180) at prov/verbs/src/verbs_ep.c:1536 #6 0x00007ffff77e9a82 in vrb_srx_recv (ep_fid=0xf4b810,
buf=0x7ffff0d80258, len=16448, desc=0x444610,
src_addr=18446744073709551615, context=0x7ffff0d80170)
at prov/verbs/src/verbs_ep.c:1582 #7 0x00007ffff780fb92 in fi_recv (ep=0xf4b810, buf=0x7ffff0d80258,
len=16448, desc=0x444610, src_addr=18446744073709551615,
context=0x7ffff0d80170) at ./include/rdma/fi_endpoint.h:297 #8 0x00007ffff7819363 in rxm_post_recv (rx_buf=0x7ffff0d80170)
at prov/rxm/src/rxm_cq.c:1949 #9 0x00007ffff781946d in rxm_prepost_recv (ep=0x434580,
--Type for more, q to quit, c to continue without paging--
rx_ep=0xf4b810) at prov/rxm/src/rxm_cq.c:1973 #10 0x00007ffff7806264 in rxm_ep_ctrl (fid=0x434580, command=6,
arg=0x0) at prov/rxm/src/rxm_ep.c:1611 #11 0x00000000004040d1 in fi_enable (ep=0x434580)
at /home/jdesai/include/rdma/fi_endpoint.h:217 #12 ft_enable_ep (bind_ep=0x434580, bind_eq=,
bind_av=, bind_txcq=,
bind_rxcq=0xf4ba90, bind_txcntr=0x0, bind_rxcntr=0x0,
bind_rma_cntr=0x0) at common/shared.c:1335 #13 0x000000000040adfa in ft_enable_ep_recv ()
at common/shared.c:1348 #14 0x000000000040bf5c in ft_init_fabric () at common/shared.c:1241 #15 0x0000000000401bed in run () at benchmarks/rma_bw.c:52 #16 main (argc=9, argv=0x7fffffffd438) at benchmarks/rma_bw.c:140
The text was updated successfully, but these errors were encountered:
Description
Running the following tests after setting FI_OFI_RXM_USE_SRX=true results in assertion failure for verbs;ofi_rxm:
fi_cq_data -e rdm -o writedata -p "verbs;ofi_rxm"
fi_rma_bw -e rdm -o writedata -p "verbs;ofi_rxm"
fi_rma_bw -e rdm -o writedata -u -p "verbs;ofi_rxm"
fi_multinode_coll -n 3 -p "verbs;ofi_rxm"
To Reproduce
Steps to reproduce the behavior:
set FI_OFI_RXM_USE_SRX=true environment variable and run following tests:
server: fi_rma_bw -e rdm -o writedata -p "verbs;ofi_rxm" -s <server_ip>
client: fi_rma_bw -e rdm -o writedata -p "verbs;ofi_rxm" -s <client_ip> <server_ip>
Output
Backtrace from gdb:
(gdb) bt
#0 0x00007ffff56fbcbb in raise () from /lib64/libc.so.6
#1 0x00007ffff56fd355 in abort () from /lib64/libc.so.6
#2 0x00007ffff56f3cba in __assert_fail_base () from /lib64/libc.so.6
#3 0x00007ffff56f3d42 in __assert_fail () from /lib64/libc.so.6
#4 0x00007ffff77e3e20 in vrb_alloc_ctx (progress=0x4341c0)
at prov/verbs/src/verbs_ofi.h:983
#5 0x00007ffff77e9760 in vrb_post_srq (srx=0xf4b810,
wr=0x7fffffffd180) at prov/verbs/src/verbs_ep.c:1536
#6 0x00007ffff77e9a82 in vrb_srx_recv (ep_fid=0xf4b810,
buf=0x7ffff0d80258, len=16448, desc=0x444610,
src_addr=18446744073709551615, context=0x7ffff0d80170)
at prov/verbs/src/verbs_ep.c:1582
#7 0x00007ffff780fb92 in fi_recv (ep=0xf4b810, buf=0x7ffff0d80258,
len=16448, desc=0x444610, src_addr=18446744073709551615,
context=0x7ffff0d80170) at ./include/rdma/fi_endpoint.h:297
#8 0x00007ffff7819363 in rxm_post_recv (rx_buf=0x7ffff0d80170)
at prov/rxm/src/rxm_cq.c:1949
#9 0x00007ffff781946d in rxm_prepost_recv (ep=0x434580,
--Type for more, q to quit, c to continue without paging--
rx_ep=0xf4b810) at prov/rxm/src/rxm_cq.c:1973
#10 0x00007ffff7806264 in rxm_ep_ctrl (fid=0x434580, command=6,
arg=0x0) at prov/rxm/src/rxm_ep.c:1611
#11 0x00000000004040d1 in fi_enable (ep=0x434580)
at /home/jdesai/include/rdma/fi_endpoint.h:217
#12 ft_enable_ep (bind_ep=0x434580, bind_eq=,
bind_av=, bind_txcq=,
bind_rxcq=0xf4ba90, bind_txcntr=0x0, bind_rxcntr=0x0,
bind_rma_cntr=0x0) at common/shared.c:1335
#13 0x000000000040adfa in ft_enable_ep_recv ()
at common/shared.c:1348
#14 0x000000000040bf5c in ft_init_fabric () at common/shared.c:1241
#15 0x0000000000401bed in run () at benchmarks/rma_bw.c:52
#16 main (argc=9, argv=0x7fffffffd438) at benchmarks/rma_bw.c:140
The text was updated successfully, but these errors were encountered: