-
Notifications
You must be signed in to change notification settings - Fork 143
Description
environment:qemu-8.1.1,host OS: CentOS7.2, guest OS: CentOS8 with linux kernel 6.5, ndctl-78
execute on the host:
modprobe brd rd_nr=1 rd_size=16777216 max_part=0
mke2fs /dev/ram0
mount /dev/ram0 /tmp/pmem0
dd if=/dev/zero of=/tmp/pmem0/cxltest.raw bs=1M count=512
dd if=/dev/zero of=/tmp/pmem0/lsa.raw bs=1M count=256
Start qemu:
qemu-system-x86_64 /root/CentOS-Stream-GenericCloud-8-latest.x86_64.qcow2
-smp 4
-m 4G
-net nic
-net tap,ifname=tap1,script=/etc/qemu-ifup,downscript=no
-vnc :0
-daemonize
-enable-kvm
-machine type=q35,cxl=on
-object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/pmem0/cxltest.raw,size=512M
-object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/pmem0/lsa.raw,size=256M
-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1
-device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2
-device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0
-M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G
execute on the virtual machine:
[root@localhost ~]# cxl create-region -d decoder0.0 -t pmem -m mem0
{
"region":"region0",
"resource":"0x190000000",
"size":"512.00 MiB (536.87 MB)",
"type":"pmem",
"interleave_ways":1,
"interleave_granularity":256,
"decode_state":"commit",
"mappings":[
{
"position":0,
"memdev":"mem0",
"decoder":"decoder2.0"
}
]
}
cxl region: cmd_create_region: created 1 region
[root@localhost ~]# ndctl create-namespace
{
"dev":"namespace0.0",
"mode":"fsdax",
"map":"dev",
"size":"502.00 MiB (526.39 MB)",
"uuid":"53f3a16b-39d3-43de-95bf-ceb3d67f6d08",
"sector_size":512,
"align":2097152,
"blockdev":"pmem0"
}
[root@localhost ~]# fio -output=/root/fio_result.txt -name=100S100W -filename=/dev/pmem0 -ioengine=libaio -direct=1 -blocksize=4K -size=128M -rw=write -iodepth=8 -numjobs=1
[root@localhost ~]# ][100.0%][w=319KiB/s][w=79 IOPS][eta 00m:00s]
fio uses 100% of CPU, perf result:
- 48.98% 0.00% fio [kernel.kallsyms] [k] entry_SYSCALL_64_after_hwframe
- 48.98% 0.00% fio [kernel.kallsyms] [k] do_syscall_64
- 48.97% 0.00% fio libc-2.28.so [.] syscall
- 48.97% 0.00% fio [kernel.kallsyms] [k] __x64_sys_io_submit
- 48.97% 0.00% fio [kernel.kallsyms] [k] io_submit_one
- 48.97% 0.00% fio [kernel.kallsyms] [k] aio_write
- 48.97% 0.00% fio [kernel.kallsyms] [k] blkdev_write_iter
- 48.97% 0.00% fio [kernel.kallsyms] [k] __generic_file_write_iter
- 48.97% 0.00% fio [kernel.kallsyms] [k] generic_file_direct_write
- 48.97% 0.00% fio [kernel.kallsyms] [k] pmem_do_write
- 48.97% 0.00% fio [kernel.kallsyms] [k] blkdev_direct_IO.part.22
- 48.97% 0.00% fio [kernel.kallsyms] [k] submit_bio_noacct_nocheck
- 48.97% 0.00% fio [kernel.kallsyms] [k] __submit_bio
- 48.97% 0.00% fio [kernel.kallsyms] [k] pmem_submit_bio
- 48.97% 48.95% fio [kernel.kallsyms] [k] __memcpy_flushcache
- 0.54% 0xa00000003
syscall
entry_SYSCALL_64_after_hwframe
do_syscall_64
__x64_sys_io_submit
io_submit_one
aio_write
blkdev_write_iter
__generic_file_write_iter
generic_file_direct_write
blkdev_direct_IO.part.22
submit_bio_noacct_nocheck
__submit_bio
pmem_submit_bio
pmem_do_write
__memcpy_flushcache - 0.54% 0xe00000007
syscall
entry_SYSCALL_64_after_hwframe
do_syscall_64
__x64_sys_io_submit
io_submit_one
aio_write
blkdev_write_iter
__generic_file_write_iter
generic_file_direct_write
blkdev_direct_IO.part.22
submit_bio_noacct_nocheck
__submit_bio
pmem_submit_bio
pmem_do_write
__memcpy_flushcache
Why is __memcpy_flushcache so slow?