From 12c9ccfb8fcc8ddb211c8e27ca49ff2d0d5f78fb Mon Sep 17 00:00:00 2001 From: "Pavel A. Tomskikh" Date: Thu, 26 Jan 2023 15:21:05 +0700 Subject: [PATCH] #43 implement benchmarks --- gpumat/benchmark.py | 432 +++++++++++++++++++++++++++++++++++++++ gpumat/logo.png | Bin 0 -> 10559 bytes gpumat/run_benchmarks.sh | 58 ++++++ 3 files changed, 490 insertions(+) create mode 100755 gpumat/benchmark.py create mode 100644 gpumat/logo.png create mode 100755 gpumat/run_benchmarks.sh diff --git a/gpumat/benchmark.py b/gpumat/benchmark.py new file mode 100755 index 000000000..572ffdec5 --- /dev/null +++ b/gpumat/benchmark.py @@ -0,0 +1,432 @@ +#!/usr/bin/env python3 +import platform +import random +import statistics +import sys +import time +from dataclasses import dataclass +from typing import List, Dict, Callable, Tuple, Optional + +import numpy as np + +from savant.deepstream.opencv_utils import ( + nvds_to_gpu_mat, + alpha_comp, + draw_rect, + apply_cuda_filter, +) +from savant.deepstream.utils import nvds_frame_meta_iterator, get_nvds_buf_surface + +sys.path.append('../../') + +import cv2 +import gi + +gi.require_version('Gst', '1.0') +from gi.repository import GLib, Gst +import pyds + +scale = 10**6 # milliseconds +RECT_COLOR = (127, 127, 127, 255) # gray +RECT_N = 20 +RECT_WIDTH = 100 +RECT_HEIGHT = 100 +FACE_WIDTH = 30 +FACE_HEIGHT = 40 + + +@dataclass +class BenchmarkData: + overlay: np.ndarray + overlay_mat: cv2.cuda.GpuMat + points: List[Tuple[int, int]] + cuda_blur_filter: cv2.cuda.Filter + + +def benchmark_cpu_overlay( + gst_buffer: Gst.Buffer, + nvds_frame_meta: pyds.NvDsFrameMeta, + data: BenchmarkData, +): + with get_nvds_buf_surface(gst_buffer, nvds_frame_meta) as np_frame: + height, width, _ = data.overlay.shape + np_frame[:height, :width] = data.overlay + + +def benchmark_gpu_overlay( + gst_buffer: Gst.Buffer, + nvds_frame_meta: pyds.NvDsFrameMeta, + data: BenchmarkData, +): + with nvds_to_gpu_mat(gst_buffer, nvds_frame_meta) as frame_mat: + alpha_comp(frame_mat, data.overlay, (0, 0)) + + +def benchmark_gpu_overlay_single( + gst_buffer: Gst.Buffer, + nvds_frame_meta: pyds.NvDsFrameMeta, + data: BenchmarkData, +): + with nvds_to_gpu_mat(gst_buffer, nvds_frame_meta) as frame_mat: + alpha_comp(frame_mat, data.overlay_mat, (0, 0)) + + +def benchmark_cpu_draw_rectangles( + gst_buffer: Gst.Buffer, + nvds_frame_meta: pyds.NvDsFrameMeta, + data: BenchmarkData, +): + with get_nvds_buf_surface(gst_buffer, nvds_frame_meta) as np_frame: + for x, y in data.points: + cv2.rectangle( + np_frame, + (x, y), + (x + RECT_WIDTH, y + RECT_HEIGHT), + RECT_COLOR, + 4, + ) + + +def benchmark_gpu_draw_rectangles( + gst_buffer: Gst.Buffer, + nvds_frame_meta: pyds.NvDsFrameMeta, + data: BenchmarkData, +): + with nvds_to_gpu_mat(gst_buffer, nvds_frame_meta) as frame_mat: + for x, y in data.points: + draw_rect( + frame_mat, + (x, y, x + RECT_WIDTH, y + RECT_HEIGHT), + RECT_COLOR, + 4, + ) + + +def benchmark_cpu_blur_faces( + gst_buffer: Gst.Buffer, + nvds_frame_meta: pyds.NvDsFrameMeta, + data: BenchmarkData, +): + with get_nvds_buf_surface(gst_buffer, nvds_frame_meta) as np_frame: + for x, y in data.points: + np_frame[y : y + FACE_HEIGHT, x : x + FACE_WIDTH] = cv2.GaussianBlur( + np_frame[y : y + FACE_HEIGHT, x : x + FACE_WIDTH], + (31, 31), + 100, + 100, + ) + + +def benchmark_gpu_blur_faces( + gst_buffer: Gst.Buffer, + nvds_frame_meta: pyds.NvDsFrameMeta, + data: BenchmarkData, +): + with nvds_to_gpu_mat(gst_buffer, nvds_frame_meta) as frame_mat: + for x, y in data.points: + apply_cuda_filter( + data.cuda_blur_filter, frame_mat, (x, y, FACE_WIDTH, FACE_HEIGHT) + ) + + +def benchmark_gpu_blur_faces_in_cpu( + gst_buffer: Gst.Buffer, + nvds_frame_meta: pyds.NvDsFrameMeta, + data: BenchmarkData, +): + with nvds_to_gpu_mat(gst_buffer, nvds_frame_meta) as frame_mat: + for x, y in data.points: + roi = cv2.cuda.GpuMat(frame_mat, (x, y, FACE_WIDTH, FACE_HEIGHT)) + roi.upload( + cv2.GaussianBlur( + roi.download(), + (31, 31), + 100, + 100, + ) + ) + + +def benchmark_gpu_download_upload( + gst_buffer: Gst.Buffer, + nvds_frame_meta: pyds.NvDsFrameMeta, + data: BenchmarkData, +): + with nvds_to_gpu_mat(gst_buffer, nvds_frame_meta) as frame_mat: + for x, y in data.points: + roi = cv2.cuda.GpuMat(frame_mat, (x, y, RECT_WIDTH, RECT_HEIGHT)) + part = roi.download() + roi.upload(part) + + +BenchmarkFunc = Callable[[Gst.Buffer, pyds.NvDsFrameMeta, BenchmarkData], None] +BENCHMARK_FUNCS: Dict[str, Tuple[Optional[BenchmarkFunc], Optional[BenchmarkFunc]]] = { + 'overlay': (benchmark_cpu_overlay, benchmark_gpu_overlay), + 'overlay-single': (None, benchmark_gpu_overlay_single), + 'draw-rectangles': (benchmark_cpu_draw_rectangles, benchmark_gpu_draw_rectangles), + 'blur-faces': (benchmark_cpu_blur_faces, benchmark_gpu_blur_faces), + 'blur-faces-in-cpu': (None, benchmark_gpu_blur_faces_in_cpu), + 'download-upload': (None, benchmark_gpu_download_upload), +} + + +def pad_buffer_probe( + pad: Gst.Pad, + info: Gst.PadProbeInfo, + benchmark_func: BenchmarkFunc, + data: BenchmarkData, + measurements: List[float], +): + data.points = [ + (random.randint(0, 1900 - RECT_WIDTH), random.randint(0, 1000 - RECT_HEIGHT)) + for _ in range(RECT_N) + ] + gst_buffer: Gst.Buffer = info.get_buffer() + nvds_batch_meta = pyds.gst_buffer_get_nvds_batch_meta(hash(gst_buffer)) + for nvds_frame_meta in nvds_frame_meta_iterator(nvds_batch_meta): + ts1 = time.time() + benchmark_func(gst_buffer, nvds_frame_meta, data) + ts2 = time.time() + measurements.append((ts2 - ts1) * scale) + + return Gst.PadProbeReturn.OK + + +def is_aarch64(): + return platform.uname()[4] == 'aarch64' + + +def bus_call(bus, message, loop): + t = message.type + if t == Gst.MessageType.EOS: + sys.stdout.write("End-of-stream\n") + loop.quit() + elif t == Gst.MessageType.WARNING: + err, debug = message.parse_warning() + sys.stderr.write("Warning: %s: %s\n" % (err, debug)) + elif t == Gst.MessageType.ERROR: + err, debug = message.parse_error() + sys.stderr.write("Error: %s: %s\n" % (err, debug)) + loop.quit() + return True + + +def main(args): + assert ( + len(args) > 2 + ), 'Usage: ./benchmark.py [n-frames] [output-filename]' + benchmark_name = args[1] + is_gpu = args[2] == 'gpu' + assert ( + benchmark_name in BENCHMARK_FUNCS + ), f'Available benchmark names: {", ".join(BENCHMARK_FUNCS.keys())}' + benchmark_func = BENCHMARK_FUNCS[benchmark_name][int(is_gpu)] + assert benchmark_func is not None, 'Benchmark not implemented' + + output_filename = None + if len(args) > 3: + n_frames = int(args[3]) + if len(args) > 4: + output_filename = args[4] + else: + n_frames = 1 + + Gst.init(None) + + print("Creating Pipeline") + pipeline = Gst.Pipeline() + is_live = False + + print("Creating streammux") + streammux = Gst.ElementFactory.make("nvstreammux", "streammux") + pipeline.add(streammux) + + print("Creating source") + source = Gst.ElementFactory.make("videotestsrc", "source") + pipeline.add(source) + + print("Creating source converter") + source_converter = Gst.ElementFactory.make("nvvideoconvert", "source-converter") + pipeline.add(source_converter) + + print("Creating source capsfilter") + source_capsfilter = Gst.ElementFactory.make("capsfilter", "source-capsfilter") + pipeline.add(source_capsfilter) + + print("Creating workload") + workload = Gst.ElementFactory.make("identity", "workload") + pipeline.add(workload) + + print("Creating streamdemux") + streamdemux = Gst.ElementFactory.make("nvstreamdemux", "streamdemux") + pipeline.add(streamdemux) + + print("Creating queue") + queue = Gst.ElementFactory.make("queue", "queue") + pipeline.add(queue) + + if output_filename: + print("Creating converter") + converter = Gst.ElementFactory.make("nvvideoconvert", "converter") + pipeline.add(converter) + + print("Creating sink_capsfilter") + sink_capsfilter = Gst.ElementFactory.make("capsfilter", "sink_capsfilter") + pipeline.add(sink_capsfilter) + + print("Creating encoder") + encoder = Gst.ElementFactory.make("nvv4l2h264enc", "encoder") + pipeline.add(encoder) + + print("Creating parser") + parser = Gst.ElementFactory.make("h264parse", "parser") + pipeline.add(parser) + + print("Creating sink") + sink = Gst.ElementFactory.make("filesink", "sink") + pipeline.add(sink) + else: + print("Creating sink") + sink = Gst.ElementFactory.make("fakesink", "sink") + pipeline.add(sink) + + source.set_property('num-buffers', n_frames) + + if is_live: + streammux.set_property('live-source', 1) + streammux.set_property('width', 1920) + streammux.set_property('height', 1080) + streammux.set_property('batch-size', 1) + streammux.set_property('batched-push-timeout', 4000000) + + sink.set_property("sync", 0) + sink.set_property("qos", 0) + sink.set_property("enable-last-sample", 0) + if output_filename: + sink.set_property("location", output_filename) + + if not is_aarch64(): + nv_buf_memory_type = int(pyds.NVBUF_MEM_CUDA_UNIFIED) + source_converter.set_property("nvbuf-memory-type", nv_buf_memory_type) + streammux.set_property("nvbuf-memory-type", nv_buf_memory_type) + if output_filename: + converter.set_property("nvbuf-memory-type", nv_buf_memory_type) + + source_capsfilter.set_property( + 'caps', + Gst.Caps.from_string( + 'video/x-raw(memory:NVMM), format=RGBA, width=1920, height=1080' + ), + ) + if output_filename: + sink_capsfilter.set_property( + 'caps', + Gst.Caps.from_string( + 'video/x-raw(memory:NVMM), format=RGBA, width=1920, height=1080' + ), + ) + + print("Linking elements in the Pipeline") + + assert source.link(source_converter) + assert source_converter.link(source_capsfilter) + + assert ( + source_capsfilter.get_static_pad('src').link( + streammux.get_request_pad('sink_0') + ) + == Gst.PadLinkReturn.OK + ) + + assert streammux.link(workload) + assert workload.link(streamdemux) + + streamdemux_src_pad = streamdemux.get_request_pad('src_0') + streamdemux.get_request_pad('src_1') + streamdemux.get_request_pad('src_2') + streamdemux.get_request_pad('src_3') + queue_sink_pad = queue.get_static_pad('sink') + assert streamdemux_src_pad.link(queue_sink_pad) == Gst.PadLinkReturn.OK + + if output_filename: + assert queue.link(converter) + assert converter.link(encoder) + assert encoder.link(parser) + assert parser.link(sink) + else: + assert queue.link(sink) + + # create an event loop and feed gstreamer bus messages to it + loop = GLib.MainLoop() + bus = pipeline.get_bus() + bus.add_signal_watch() + bus.connect("message", bus_call, loop) + + sink_pad = workload.get_static_pad("sink") + measurements = [] + if not sink_pad: + sys.stderr.write("Unable to get sink pad") + else: + overlay = cv2.imread('logo.png', cv2.IMREAD_UNCHANGED) + benchmark_data = BenchmarkData( + overlay=overlay, + overlay_mat=cv2.cuda.GpuMat(overlay), + points=[], + cuda_blur_filter=cv2.cuda.createGaussianFilter( + cv2.CV_8UC4, + cv2.CV_8UC4, + (31, 31), + 100, + 100, + ), + ) + sink_pad.add_probe( + Gst.PadProbeType.BUFFER, + pad_buffer_probe, + benchmark_func, + benchmark_data, + measurements, + ) + + print("Starting pipeline") + ts1 = time.time() + pipeline.set_state(Gst.State.PLAYING) + try: + loop.run() + except: + pass + print("Exiting app\n") + pipeline.set_state(Gst.State.NULL) + ts2 = time.time() + elapsed = ts2 - ts1 + print(f"Elapsed: {elapsed:.2f}, framerate: {n_frames / elapsed:.2f}") + metrics = [ + ('min', min(measurements)), + ('max', max(measurements)), + ('mean', statistics.mean(measurements)), + ('median', statistics.median(measurements)), + ('80%', statistics.quantiles(measurements, n=5)[-1]), + ('90%', statistics.quantiles(measurements, n=10)[-1]), + ('95%', statistics.quantiles(measurements, n=20)[-1]), + ('99%', statistics.quantiles(measurements, n=100)[-1]), + ('stdev', statistics.stdev(measurements)), + ] + for name, val in metrics: + print(f'{name}: {val:.3f}') + device_name = "gpu" if is_gpu else "cpu" + with open('metrics.csv', 'a') as f: + f.write( + ','.join( + [benchmark_name, device_name, str(n_frames)] + + [f'{val:.3f}' for _, val in metrics] + ) + ) + f.write('\n') + measurements_filename = f'measurements-{benchmark_name}-{device_name}.txt' + with open(measurements_filename, 'w') as f: + for x in measurements: + f.write(f'{x}\n') + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/gpumat/logo.png b/gpumat/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..3a4f7e021c6598edd4f64bf0edd4772ccd6bef15 GIT binary patch literal 10559 zcmX9^Wmp?+(+yBuid%7am*U0UJ-EBOQ{0LL*A{oD6e$iZ4k-@BrMSC&`@G+eB-fSQ zWbd8(%$(UdCss{G79E8c1q1@2%gae=0H44Adm$kJ@3ne$1t1V3g1nTtmT%s1K62iN zx%AT>rjyzXu)B{g`z75Op_w;_sqnGcdpKb^STn8xrOQ;J)MO>TCQ$af-=FlbGrEwG zEvY|$%K8L5Y&vf47~Xh6WsyMO2kA8WDaY#Nm3ezt4TYxqH!4&?e;&@0?__AL9O#hd z2TY3Gk%%1HCQkJ6F6SG52yF+cz&9X;Ui%|}Z1(&(;Xo3gM;H=N19>PxuPEq90w$P) zWC!vtIG+(-5*#`bG12D-8t2QdRaC-1asw^HjKlT9a8v~f$opYfrP;VJ^eT>eVD?72 zCW{qawwAGY!877yu3UM6#9&S7SjXN;fdWArAA$>g(Cy6ZQ#!&(;B&);@Oz~}$KLex zFF)DdHN5*oZMTF=1@fiD-SDwCc1eus@2FrnkJv5B=?F^*pXh5{%wKH=O(OKBX2??0 zgzU|zg@$mE>;%Z6N>Y2lr{uvkxAs|nXG_@oJ*cRb5YV?6u zY?N~`48(_EXuno2V<`n&o2+|{3+y?PoxXjtD5&izXpQlvE%zG5oW~#+EVSwa^TRq1 zxm|G7j~#a-1RpE1M_Sq}w1op)M9mtiLpY&hBUyZ9FfX%De~qx6>A=CF$jt^B^3OAN z@jSR5o~&SSq+?CiQ39iGxd#M-tRWc|ajwzp`#&Kqm|zY&+(Fy#hsxGR?R3pIRFI)Z zdC-;o7|+lm9?E$S;`#a}1$!#K3@hE2upg@hE^t3`TS&Zic1Fh}VP5}^BrECvK{5Am zre(ZbaziDb;o@rD`*t&+pJLNbacv-Du{tXc&oHYwhS z?aW3?_gG>8>;QXU08*i2Rm_LRqM^3kc|QfqfiX&F(|}8ojr_XjvlBSf8bl8+ykmq) zq$wz)H6~ae<=qmYlD~814t-?$y}aiRI)hovjrk=v=B`!I21Inf5cHgczDO zq^n^g?uapI?&BT;=SpU$f2fs?-!ru$3yv|bXrkal^vU=;@>5qgkx{;W>|{yezpoo9 zZZMtmS-yeZ*nwMoXD9GUF>7J>b`^P}`bJKh6d5vf2x_2WLJ2RiqOALGl|CTnN@m%7 zVe4F$`L}n;F>(C{H1L#Vz`wWEv<|#rh91d{s6{&~;X9P?bjeC@Dr<0IUWpqzt4>(^ z&R4R11BC>C{N1CnwX7xGcsNUh+6n3Wv$w%U;dW95#T!xOuF>RPYg zgRedeZ}I*?4%6^8XQ76>QjhILQ@OCj9tD6#0nB;QT!(S~NbHFDE~pD1FS2U9LsuHO z+LS6Nu$y?Hiu=HhSHGqWM3@-YF9bGG(uC*CskJR^JntSCmJK46C7t8Yuh9XSYvF#k z^CMNx_sk9NTs++|p1&k+jj`*%@t1{DVxvdjTjQ4vAQ3fi!*s(MEA;Jh!qb4e|xCu7ts7P7rqVu1w$?t95f|YRx z_cju|>D#bEW#g~@C>*tx_j-@kG~s>KNS*nZ_`GR^w1R=vE-L>0Dacs)lovRE_x5o=yepFi_)fjqKDGpsgF{ML{27ic9n~HRP0UUOZz-{c2(6;CfBOEvbT}3m`~!x z+|SgMh}ueXRWZ%M(L%0xr$_HKPOR&SjGLF{9wR@a{5zrCx@0=D%ab@j!n z;SsG|M7tALG0tqJDuX=_*PsJKo^hA(#t94=)xd)Udk%jco!8;*vH`bS*qEnB%=b#f z@$P|CWI{zcf^kfR9b+ecguUgq;C=&))KXhP54Jo4vUocJHU!P>fl7^J+;y%t?SkAH z-kcr3OmG_mU3@D{?^*@JJ(Gy;kWc7DpBF$G6GFR>bf|)_P6A5#7|IfrNi;-k4^BU<#*Wfws>)}BkApKy<5{=J@AayYC51X2 z{I*hfvulGS!~5=-1mvahfo+pR!LQ#Tp-HJ3K3=J(VMS<_8(@7au67U@Dn;!@>zT@B zxO->C43waJM;~_whsMRQYgU2ndDPwO_shS7DOv*E030FApNX%d@_zW5@EEk#6tI~o zGH8F|>QX;6*(fs^wY1&?Ta!JR%nG?{PK(~Vlv8;=#^+(pP5MS^B!?zb<*Kbcvy^Z6 zkz{8f-NTjE0h*6TqWJEpkxi-Rh6-w4q0oveZNqE!s2j_IsvABwCZg8(UsBxy)$I(X zp=C8*=fix7>{UAb*PAMwT>(%IN6%13FOaI^h{$4REeLh%6~?luind z83^*zI!s>4^+)V1Lk!R0O>$Gm))f3w%h~SARyEuab~{0j0+M6G#|FjSW{0RT%SLyg zz@?^!P|Y$^mB3R2<()Nr2M!RE*maPTT+pI|cXLUKvzbE+|2qVR+aaI5NXpc2hW1xyN;@gN`{F{GF`>_$9kZO7HIge$pf|8x!~`}M73NE)68x5 zANhDS%?dl^L!hWD%3OdZjVb9A_fzi4R{38z|?DqVs~`z6hTH?8EN zBwG$mIwg&a>SxfjGX7DEYjVh=?Gx#Z3s6xfVH&`^BRstwKX6F-+2I(N^VCrGJ;}nS zNSjHxiF3bMKoLZ;70ib|*ZO)-b)HB5{@9Z))A?v}QS_F6!X#fzv=6ib$SX`~=6Kyz z1;U5a0^3gfEVc5x_nUSLTQ!OB0z<$1xmeipy7Q z$qT|^`W@k;t=Rcg2<$hcD!vrEW-dnHP21q6!3lwz%=lGcys0%UP>F*pqUU5K!sNtP zQgbP;ayCv11K#!gH62(5x`>gekrwVS>6FU9@Q()NZ_4iCJSj@>*1=a>Ik`uAe!Fd( z9E`aNTu117P%fNucaQVS*NL1F9i>t}efLF+&y-Rx;^{ORg;K(MNpbyU7vITQ<9LmN zQf9?%xEE#(qyJbA!P#izBc(+dzGr!2(Tl2F-g;B0=RWBlnqE4{7i|`5AER3qJv}>f z52%Abw^1&s!n``hS9-gAC>dhN9<$DmnPLSe>ck5Qn2D`C#yQGs$Wdl1?7k@JYR!fz zOm#dt$f6TsSrox8T7WT&mkwb+ISo6HrT>E9i(|#-=T|XC5Au4b4m|uxiY%DJ6GlTr z6&aKG#X-beTu@qhEzF}zwM?@5Ly>=23lp;Bk6g=Jr`Ih0ouYMcDL-CIq;TvoZJ-}P zo^dBy^hL8gv+#(!-pb0%%RMbYn{rflga}+G+P-z-T>7|BZWrnn7=z@zX*+zRaIUaEn zeP8@6zEo0p)AK8Pq>O%b8@bMDCgW!@EF1Al%HETJ%z#Y=&=N3)^R!b6j!;<=y1vX< z)hL1Wo90W2RYF#5R5G86r~k`@!p1f7sj!50ZaaX|N~F~iGWgdj^MuOL9utcGT}2r^ z3R{i*oVSWH8IRf8dE!j%C#g-Bq-{4~%~U@UGv=PpEw+XlIZr)+DirOxlzocK-01B% zD}WqfSf6Hmh)TE6le6r;N|pO8&D6{)@;)QkhLzbS!xVxN;Ia1B?=f2wL^NT~}AJTIQ~ zoJLRMNb&F_>@A&4e@=(w5D6sY5=>fUC0u=3eYy?o`;kSFq-CLjHHtclE775anI^?J!W zh{LY&-a|`^(m-W-9#Xh5Df9hP`D?=Z^Q#?NgbWHineQ33ruX$@j!ER5KaA}d+pLeBTFiXl#7)Y07UB@Z{|>RS zaNQOyF}ETx(an!x=t^17XDSDpaT~tSGaQH`THZc6+?eoT!g3Z`%}V%26{&^u(V-x$ ztR_z46-n`E>+4B;+g9@1dDhBp?Mfn~PK2J{a1=cWYQe70nKj-JH-cV&8Er;gA0G{d zu0;-Td^a_b7f(ZL@x(8-%;D%#{2*3K0u%R}?=BapQ$F>qr$~LL^ z+Z8kx$eu6$=m={hVva-@@3-c(T}+=6^Y3?dGt+`)cb46|@vN|L8CJtBBpsYA(TVa;WYpYej+25`tXj7(n^FflQvF=z>EBD+l>zpa zyOnm9nBBddhNeoC-E?S9w|KJIankMd{$$?Cuu}NA>HGfm_L&^dgc(1+@28sn(86A} z)BO%9|G;9FaZ(Dyxkyv6g8v~$_7%Gt<{m53k@ZWq+nz$nLV(0*ZC^Kex10f_0P-SQ zf4r)0aF};HUAJS%q>T|fVbHbs`mISLKTwsuu4@lrXyH)E^&R{3c`U4A@|DQtn&J~} zWJ(RzQK}DGmil}ot6&{Tg41l!eEaP7Gvz$#;2=@>e28%Dp$`1t9zR4*QN#Jhko)cC z#fJ;F@-jlLvxlNd|-ll#-(27LW?nQ8)Wyo8?@x0}q<2h?0KCvUA?PbaAgB|9qB zH;Qo|@fgmP7oRcRoW&AQ-YfoMbepHemZ16jK837IT55A4)|Z8JuT|jVVG~Fn=jRpK zR`*(*`8`x^=Zw9FNsNYO_v1wbZI+M?{yQ^5L=3V)zOzM)5P`18^X9Di`Gmxw(|$^w zIO#Y>GV|T&2wMF=D;i41w*34@lZnLUt^6M`W6&rISVXwyT$Sk07Y=J%**`ZcSyxjt zq4XUaRX8ViOAxE($%7-@4h%M>O-nmethllVhm~`8qw(eVPYb*6P@jK6+KT&Tvc@`z zFX;7_+Et4ddk9CiMR5kgb`r2JKez6opb5tC$H&*$O z$VO33F{`9|!!yI+Q+7*`(gpBzG& z?~O}$p78V3Z#LeAlL%H$3?);4CCq-p{2V(;$xF$xJ^U26kz*9&&0L6El1`1-xoeg| zQfVGalhrjBy~M$xnLLzhd8v!bCB6Cg+JVpI=G&ZU)L&C^20p_kLw9jRg+0k*l}Rj& z%S++kB+&kyfq!#G{OXLAzEpopQmdr$P~=N7@uadoux6jLLG;5D!*2c^5Mf(e2KieD zZ1Nr4TUvkMZYs-{9~rt#V?eT+@Y#vfObt_;11sAUS0_4@IDjrfzxzp8HzV5=dLish z6^2uq5q91^Htgl!{JgZhFf2OQKQqsoi|m^#VfDIRxXJUSyyI1H z#orOr>EH5}WZ$fkpsl@n#8d_^2hA!mjQtxfIz4LDxMAcWQDl1;x=iE0T)x|B=~Lc;sqD!XH=ri>E_8}v0LP+KA&HRQeUb_m|TwbX2@ z2)ZF>6xN#-$ss{_4T4*e-9n@9snd}rNhPY*9=^7`95%J)N-BD{ zq^cWOr)m;{zWif8ZVImC&yvSHHT(K#a%K{*#=G2!bb?W5?wxeLIfAaKQAT?IjI~`9 zS_;|t#bX>|Q#8*=U)t2LrdD(dx1mykO#_p+CZMle` zD_M4jUdN1A>h-FB!Uqbp zOUMHk`PZvb-wY$M{BR;45(DX0na?1j=}1*p!1zol9o^DXD;6RX$)KYN={TB6G}-FA zP-IG)RFtgQ!bg5ocfx_n$5XF(S{Pt>m-wmjZR`3Qk1ggK=2fjz2pB%N$~fuaMFm~$ ztEbl_OJ=rGn4x*x-ptagEQDjJeY49kX ztBPZ>AE3z3&mA8RYsfx~OL)#35y|w7oEQ)1Q1YInNtfptDa+wF2-0NqBj;G=#jT}- zmWM=SWH=z>k(jpvMUBe+Yfq6!meRHKKFu`?bF?Mu3YqEb4tbNJ^DhQut>gHTxbJfu z?tvy6$siFFY;i8)@+!4f0A|k=+KpKgMD!xrjB9Gb%!w^m1q!8#k302NQ@QG@*7Mc$ zzbdjzBWR4Mkp#vxI&eBcYMLN^0o+mraof5&4My&=PD>j>-Gq(`7r91kOIf7{WiO4P zp_=%dI|lK`V@8R(1Dc@60C~k!RbdurdXn@3Y@Nc)f#=8F!EGzlRMbECXpNVGig>(B zBi3hRPBD0@OsFaJ{YBP2E=2EA)RTisXh&q(#0A0&t~n%8wTa5A8joxKa0Uz~^w+Ew zxr@OAC}?1+q*6?EF6G!7Bq!3D7gzXF1CN&ynTfk&1Wn!1bZ(WXwKd$5C~OwL;-#G1 zn~6S6hrIuj!1Jm^n?5@<>dbsen!@x520=Rr_Oblclks%(Z2<|FtKH&0zp&FqH> z%72<38dyr`4%*E#@>@OoPn6iuBPXu8$9K#)BX1gptK(^3j)^e(*cm!&2T=wN?16c^ z9sEdFSy?!8EkqGlhk`wKj9qj?KnhvY1`8BPvG^GWpYNJ2|weVrK)rx%T_vfG2cU39{#Ac|ZPvahMU%B98$&kO; z8_B}yqc%l1kO(m4x6a26<5>3x3+4-GEBHPWydV8rIvX}ZBVv8k(ez$t)4u<+k9Rd4 zB|FT!R=i?!M-@1`Rscxv~x;25f%>T`Vqz#FL z9kLQF&ksQ};(nop&JTZfs8z9>n)vxdIjDF?tBHK*UrJq#(-amFF02jrN#V_uD?vPZ zN$dx#pb#%}deSJX$s@m`VOtf`0(j+ylka@AY`&kIWLl|!>q1&eob8|VCU!T^KtO<8 zJO^cSj}N>ZmV)_A|HJ)!NL3rztL^ITDR*2F(drKq9DSyZ(+Y;)s+V{XRgA}*BA(1h z#Wpj6yBAGYD$MH-!@3t+Ca-F(M~nxGSxG;7RK4_>KDkF3Pj>&-O^zP<&CLhrpuny# zkFqZizwx@M?OG6mW~VN50qJ!9)e~}It;aHFFm-uca5pN*#yG5%SqjT{AGDT-lUGln zMaxRpPgRsg%a$0uq^xTauWI7c`dq=c7QEY+u1D_Gybq6TDFA=*)D<#r%>nKI7Hv$; zDj}~&;A6h_vQT_MbNN%PF!0%IHDD^g__0nmoYzPQ?}Yq_!1Z6BE-$2<1W>pPUXQkV zwlWUmxH)-r1hQjpKTLgk600;2Ytv%E=OzpONB?7^D}=)R1>I_#_pYbq%c$VH2V4d@ zOFae`%u33?Jw}S)chWp$VsIX(-Qd$3<1Z4vs08B!L3gW+x(Pjpaf-Y>YkD5ZcHjd~v*Hm}A56b>Rz5rEpD~rnQT#U9ZF=@E=UYCX~Txsbz!7Ode7X_gSM?_=U^wvut(MaP+k{rXJYiqV;BJ2jvFgVC}F*hj~1R zuT^e(JOzdR=@nuppi!o(kwq5Soyd|^_ltLbYA}cQ13yBm>y%_=@6S2xunrUYCVR~c zZ!TZrl8#>1-}4-;YE$as^{9WP0JNkayj8GviUNf@N9Y;>-ya=*yV0x5&$V$$DWU@u zr5=WWnx{4Hr^PaTb|soonJ$gj$L|e&tmUQ^V0m|b_c99UjTSpBFo;{0G5Clu)1&a$y2Kuf2!A+%C!v<2=#!B@Ef*Fg zc%ep>c1@@WQDk7WnFL|}w9I?RRtk5G@aW~D%%CzyeTwSzEc`5B3W9H;;4xK3a52la z>!Hawa;I18SoSlniyY&!)BqDyZR_+tjU9d5%^LEPoyS)nsO~K5N??>XTi~Yi z&;j+(apyMtpyWW+NC_Tr@9U+ueOrIeMnB#7jCv9=2!pk8oderprb z7S0|t2|C;EJSwcz69{t&F~k{#p&-UmG!QXJ4ZHS!GJX$#&(DmnXn`G$MdD}Q-@u2J z#+O=&9~}vD2Hy~pEjGcPd4?EXJop!uR}EfC31i|#rgo#JvE&4!h9ZR?=Hz09sMSS5 zy|*>&^KL&g7~xxT(2eretnJ^8Drhi%y9-3BEw6&COFe(Y-8f_NvN5U@l94tm{5v9# zzrWffU5$P5*$f!PRvhd%_Rf5^C|Ay0NlPgUV2G*zc@@y*=8Jf5=9Obj_Ixnv>9a-L zdv^6IsfDXxOkML22Nm(5?O4SbArvy2S{Elrp4ITeG2Wc>tu2MuU+D2=k^FSX@6Sk! zN(_y;Z_h7!=3y7_?k`^z@SRLgoJssc>pFB~H)nuxw)_apZeVG?ZIj*<*B-du;%hyP20D>tXF<4m3qkq#FvQFteEODg=<#!EtM6_vUyWGP2M2A} zu3trWkrS8L|G-qZ8#YnlU0W@oO}TUJt7P_?YQqEmtDY@Ed!G7ca+l$n^}~L3Lsj|g z8A=<8cLkcX*t}RHwj=>PwOt7K@HMwjUHj=CrnD%RY@vTfMG9~QtC6|8{W#rvcE)ev zf1KDGWM|7GDHATvpPxTS0fexD9$pv62nxb%{d{#HXrgLebx z-7aF15;d2e7mkZI_Vo&Nzn80!niXa4x_&kB70(!IuGJ6N4eBosy@{Bdvrhge@$34N zY3S{>eS2uyHZ53E0^?Q@`r7dOzxv=LR|x2G%UbIFC~aHORkx^!v#C2)Q6YXd);RE! z!#X3_`6jt7&)xU0!f8oQo5rL#WQgx8pF7JhD_ag#i0BNT&oQW_gF zhP)pcKl8&<&`!Ndg{k1v)D^kuc{p54s}yszIA{T2-t_bWIXrlAu)J}Vm7<2yKtJD- zZ1%C-==4oBFQm%eilKKMc+B;wa$}oI4Og;0lJyV2xU$br;%O9x8R!i7>cTZJZBEUa zct}Idd0oH!`P8KTTcfHeEfSXcy|WgsmL2lf9C-2NxK&x8crB*L^8ir+Kkm_JD{_m}CIQ?HRSZNwZAsLeK+=xY0$muxuS4y2{lSkU?eyQQ z%svnED8knWhrSA+DFq;QkxBPdwGba*1YuwHtrORvQoGOa)TDy2JO-`?~dF9PGQbj zu@~wzn`?kh%|`dBXmF3AY}7FzfArwsaw0YkHU*z#PzNazcJ71he5IXEss_e1#~6?q z!H63&Q1{tyNkD`E-^5!?3EE)%kc^aB>)aj>jGjh3p2neV|0Cc5qQM!(lSh#-6fY6` z1Q;v;4+tMi>K!G}4p>*IXbWJ|IfM7^gaX_|IVy z4X{=2`hGqs#wE^oOSbeCmmVWy`O4vN!T&Ny1MqvWh32pw=$6xn9-rL)RR+ggeAN(7 z)=3rZnF=su=q&`9vfB9_Rwx1)K6Q4*6^(oE}#f_*NhK~)7~wr1g?gtdfV z0~%vB&}n88R^3eM@vQ)`2qX&L2E>t)@x}cX=3bGZ_f4-Fc7})B3YVr3fDJRr&Ju|khy&u? z(JdT29Hl_av~Z(}iq8EBeMv)CV`ld@z{e`5`XC!sa}%eY9ajKp5U)tWT)^rk`SJH% zRQR(1;96n{jR(#O8&CKd+2V*tg`UX=M35Uua{|0be}w*bvv@lU5#i6?pp^mf*U!&) zfWo%`^NbXlo#!b_3%daaS_d}5K@L?jeipAp30SSF6NZXL4nBz9n#^5l-12)9UA&ig zncH-iQaXOCLvSRB1lASQ3&d`!?HzR5jv&;_-X|#E9{4pZ7k{CrMR%%S$vXS;)ZPa1 ziAo)MuOX-$RxfRYAQz@rK;?AtlMM1Bw?czhvrI^#=QeOprZ0O7WOV6O2bN=kUOZM! kGB$^^>XC2cAkf>J`L?KL?Ii{_V2lLGORGrLNtlNHA9yWr=l}o! literal 0 HcmV?d00001 diff --git a/gpumat/run_benchmarks.sh b/gpumat/run_benchmarks.sh new file mode 100755 index 000000000..77b2db635 --- /dev/null +++ b/gpumat/run_benchmarks.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +if [[ -n "${1}" ]]; then + FRAME_NUM="${1}" +else + FRAME_NUM=1000 +fi + +GPU_BENCHMARK_NAMES=( + "overlay" + "overlay-single" + "draw-rectangles" + "blur-faces" + "blur-faces-in-cpu" + "download-upload" +) +CPU_BENCHMARK_NAMES=( + "overlay" + "draw-rectangles" + "blur-faces" +) + +echo "name,device,frame_num,min,max,mean,median,80%,90%,95%,99%,stdev" >metrics.csv + +for BENCHMARK_NAME in "${GPU_BENCHMARK_NAMES[@]}"; do + echo + date + echo "Running GPU benchmark ${BENCHMARK_NAME}" + docker run \ + --name test \ + --rm -it \ + --gpus all \ + -e GST_DEBUG=1 \ + -e LOGLEVEL=INFO \ + -e PYTHONUNBUFFERED=1 \ + --workdir /gpumat \ + --entrypoint ./benchmark.py \ + -v "$(pwd):/gpumat" \ + savant-deepstream:0.1.1-6.1.1-base "${BENCHMARK_NAME}" "gpu" "${FRAME_NUM}" +done + +for BENCHMARK_NAME in "${CPU_BENCHMARK_NAMES[@]}"; do + echo + date + echo "Running CPU benchmark ${BENCHMARK_NAME}" + docker run \ + --name test \ + --rm -it \ + --gpus all \ + -e GST_DEBUG=1 \ + -e LOGLEVEL=INFO \ + -e PYTHONUNBUFFERED=1 \ + --workdir /gpumat \ + --entrypoint ./benchmark.py \ + -v "$(pwd):/gpumat" \ + savant-deepstream:0.1.1-6.1.1-base "${BENCHMARK_NAME}" "cpu" "${FRAME_NUM}" +done +date