@@ -310,6 +310,187 @@ define void @test_2x32bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #
310310  ret  void 
311311}
312312
313+ ; Extra use of the get_active_lane_mask from an extractelement, which is replaced with ptest_first. 
314+ 
315+ define  void  @test_2x8bit_mask_with_extracts_and_ptest (i64  %i , i64  %n ) {
316+ ; CHECK-SVE-LABEL: test_2x8bit_mask_with_extracts_and_ptest: 
317+ ; CHECK-SVE:       // %bb.0: // %entry 
318+ ; CHECK-SVE-NEXT:    whilelo p1.b, x0, x1 
319+ ; CHECK-SVE-NEXT:    b.pl .LBB11_2 
320+ ; CHECK-SVE-NEXT:  // %bb.1: // %if.then 
321+ ; CHECK-SVE-NEXT:    punpklo p0.h, p1.b 
322+ ; CHECK-SVE-NEXT:    punpkhi p1.h, p1.b 
323+ ; CHECK-SVE-NEXT:    b use 
324+ ; CHECK-SVE-NEXT:  .LBB11_2: // %if.end 
325+ ; CHECK-SVE-NEXT:    ret 
326+ ; 
327+ ; CHECK-SVE2p1-SME2-LABEL: test_2x8bit_mask_with_extracts_and_ptest: 
328+ ; CHECK-SVE2p1-SME2:       // %bb.0: // %entry 
329+ ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.h, p1.h }, x0, x1 
330+ ; CHECK-SVE2p1-SME2-NEXT:    ptrue p2.b 
331+ ; CHECK-SVE2p1-SME2-NEXT:    uzp1 p3.b, p0.b, p1.b 
332+ ; CHECK-SVE2p1-SME2-NEXT:    ptest p2, p3.b 
333+ ; CHECK-SVE2p1-SME2-NEXT:    b.pl .LBB11_2 
334+ ; CHECK-SVE2p1-SME2-NEXT:  // %bb.1: // %if.then 
335+ ; CHECK-SVE2p1-SME2-NEXT:    b use 
336+ ; CHECK-SVE2p1-SME2-NEXT:  .LBB11_2: // %if.end 
337+ ; CHECK-SVE2p1-SME2-NEXT:    ret 
338+ entry:
339+     %r  = call  <vscale x 16  x i1 > @llvm.get.active.lane.mask.nxv16i1.i32 (i64  %i , i64  %n )
340+     %v0  = call  <vscale x 8  x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16  x i1 > %r , i64  0 )
341+     %v1  = call  <vscale x 8  x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16  x i1 > %r , i64  8 )
342+     %elt0  = extractelement  <vscale x 16  x i1 > %r , i32  0 
343+     br  i1  %elt0 , label  %if.then , label  %if.end 
344+ 
345+ if.then:
346+     tail  call  void  @use (<vscale x 8  x i1 > %v0 , <vscale x 8  x i1 > %v1 )
347+     br  label  %if.end 
348+ 
349+ if.end:
350+     ret  void 
351+ }
352+ 
353+ ; Extra use of the get_active_lane_mask from an extractelement, which is 
354+ ; replaced with ptest_first and reinterpret_casts because the extract is not nxv16i1. 
355+ 
356+ define  void  @test_2x8bit_mask_with_extracts_and_reinterpret_casts (i64  %i , i64  %n ) {
357+ ; CHECK-SVE-LABEL: test_2x8bit_mask_with_extracts_and_reinterpret_casts: 
358+ ; CHECK-SVE:       // %bb.0: // %entry 
359+ ; CHECK-SVE-NEXT:    whilelo p1.h, x0, x1 
360+ ; CHECK-SVE-NEXT:    b.pl .LBB12_2 
361+ ; CHECK-SVE-NEXT:  // %bb.1: // %if.then 
362+ ; CHECK-SVE-NEXT:    punpklo p0.h, p1.b 
363+ ; CHECK-SVE-NEXT:    punpkhi p1.h, p1.b 
364+ ; CHECK-SVE-NEXT:    b use 
365+ ; CHECK-SVE-NEXT:  .LBB12_2: // %if.end 
366+ ; CHECK-SVE-NEXT:    ret 
367+ ; 
368+ ; CHECK-SVE2p1-SME2-LABEL: test_2x8bit_mask_with_extracts_and_reinterpret_casts: 
369+ ; CHECK-SVE2p1-SME2:       // %bb.0: // %entry 
370+ ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.s, p1.s }, x0, x1 
371+ ; CHECK-SVE2p1-SME2-NEXT:    ptrue p2.h 
372+ ; CHECK-SVE2p1-SME2-NEXT:    uzp1 p3.h, p0.h, p1.h 
373+ ; CHECK-SVE2p1-SME2-NEXT:    ptest p2, p3.b 
374+ ; CHECK-SVE2p1-SME2-NEXT:    b.pl .LBB12_2 
375+ ; CHECK-SVE2p1-SME2-NEXT:  // %bb.1: // %if.then 
376+ ; CHECK-SVE2p1-SME2-NEXT:    b use 
377+ ; CHECK-SVE2p1-SME2-NEXT:  .LBB12_2: // %if.end 
378+ ; CHECK-SVE2p1-SME2-NEXT:    ret 
379+ entry:
380+     %r  = call  <vscale x 8  x i1 > @llvm.get.active.lane.mask.nxv8i1.i64 (i64  %i , i64  %n )
381+     %v0  = tail  call  <vscale x 4  x i1 > @llvm.vector.extract.nxv4i1.nxv8i1 (<vscale x 8  x i1 > %r , i64  0 )
382+     %v1  = tail  call  <vscale x 4  x i1 > @llvm.vector.extract.nxv4i1.nxv8i1 (<vscale x 8  x i1 > %r , i64  4 )
383+     %elt0  = extractelement  <vscale x 8  x i1 > %r , i64  0 
384+     br  i1  %elt0 , label  %if.then , label  %if.end 
385+ 
386+ if.then:
387+     tail  call  void  @use (<vscale x 4  x i1 > %v0 , <vscale x 4  x i1 > %v1 )
388+     br  label  %if.end 
389+ 
390+ if.end:
391+     ret  void 
392+ }
393+ 
394+ define  void  @test_4x4bit_mask_with_extracts_and_ptest (i64  %i , i64  %n ) {
395+ ; CHECK-SVE-LABEL: test_4x4bit_mask_with_extracts_and_ptest: 
396+ ; CHECK-SVE:       // %bb.0: // %entry 
397+ ; CHECK-SVE-NEXT:    whilelo p0.b, x0, x1 
398+ ; CHECK-SVE-NEXT:    b.pl .LBB13_2 
399+ ; CHECK-SVE-NEXT:  // %bb.1: // %if.then 
400+ ; CHECK-SVE-NEXT:    punpklo p1.h, p0.b 
401+ ; CHECK-SVE-NEXT:    punpkhi p3.h, p0.b 
402+ ; CHECK-SVE-NEXT:    punpklo p0.h, p1.b 
403+ ; CHECK-SVE-NEXT:    punpkhi p1.h, p1.b 
404+ ; CHECK-SVE-NEXT:    punpklo p2.h, p3.b 
405+ ; CHECK-SVE-NEXT:    punpkhi p3.h, p3.b 
406+ ; CHECK-SVE-NEXT:    b use 
407+ ; CHECK-SVE-NEXT:  .LBB13_2: // %if.end 
408+ ; CHECK-SVE-NEXT:    ret 
409+ ; 
410+ ; CHECK-SVE2p1-SME2-LABEL: test_4x4bit_mask_with_extracts_and_ptest: 
411+ ; CHECK-SVE2p1-SME2:       // %bb.0: // %entry 
412+ ; CHECK-SVE2p1-SME2-NEXT:    cnth x8 
413+ ; CHECK-SVE2p1-SME2-NEXT:    adds x8, x0, x8 
414+ ; CHECK-SVE2p1-SME2-NEXT:    csinv x8, x8, xzr, lo 
415+ ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.s, p1.s }, x0, x1 
416+ ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p2.s, p3.s }, x8, x1 
417+ ; CHECK-SVE2p1-SME2-NEXT:    uzp1 p4.h, p0.h, p1.h 
418+ ; CHECK-SVE2p1-SME2-NEXT:    uzp1 p5.h, p2.h, p3.h 
419+ ; CHECK-SVE2p1-SME2-NEXT:    uzp1 p4.b, p4.b, p5.b 
420+ ; CHECK-SVE2p1-SME2-NEXT:    ptrue p5.b 
421+ ; CHECK-SVE2p1-SME2-NEXT:    ptest p5, p4.b 
422+ ; CHECK-SVE2p1-SME2-NEXT:    b.pl .LBB13_2 
423+ ; CHECK-SVE2p1-SME2-NEXT:  // %bb.1: // %if.then 
424+ ; CHECK-SVE2p1-SME2-NEXT:    b use 
425+ ; CHECK-SVE2p1-SME2-NEXT:  .LBB13_2: // %if.end 
426+ ; CHECK-SVE2p1-SME2-NEXT:    ret 
427+ entry:
428+     %r  = call  <vscale x 16  x i1 > @llvm.get.active.lane.mask.nxv16i1.i32 (i64  %i , i64  %n )
429+     %v0  = call  <vscale x 4  x i1 > @llvm.vector.extract.nxv4i1.nxv16i1.i64 (<vscale x 16  x i1 > %r , i64  0 )
430+     %v1  = call  <vscale x 4  x i1 > @llvm.vector.extract.nxv4i1.nxv16i1.i64 (<vscale x 16  x i1 > %r , i64  4 )
431+     %v2  = call  <vscale x 4  x i1 > @llvm.vector.extract.nxv4i1.nxv16i1.i64 (<vscale x 16  x i1 > %r , i64  8 )
432+     %v3  = call  <vscale x 4  x i1 > @llvm.vector.extract.nxv4i1.nxv16i1.i64 (<vscale x 16  x i1 > %r , i64  12 )
433+     %elt0  = extractelement  <vscale x 16  x i1 > %r , i32  0 
434+     br  i1  %elt0 , label  %if.then , label  %if.end 
435+ 
436+ if.then:
437+     tail  call  void  @use (<vscale x 4  x i1 > %v0 , <vscale x 4  x i1 > %v1 , <vscale x 4  x i1 > %v2 , <vscale x 4  x i1 > %v3 )
438+     br  label  %if.end 
439+ 
440+ if.end:
441+     ret  void 
442+ }
443+ 
444+ define  void  @test_4x2bit_mask_with_extracts_and_reinterpret_casts (i64  %i , i64  %n ) {
445+ ; CHECK-SVE-LABEL: test_4x2bit_mask_with_extracts_and_reinterpret_casts: 
446+ ; CHECK-SVE:       // %bb.0: // %entry 
447+ ; CHECK-SVE-NEXT:    whilelo p0.h, x0, x1 
448+ ; CHECK-SVE-NEXT:    b.pl .LBB14_2 
449+ ; CHECK-SVE-NEXT:  // %bb.1: // %if.then 
450+ ; CHECK-SVE-NEXT:    punpklo p1.h, p0.b 
451+ ; CHECK-SVE-NEXT:    punpkhi p3.h, p0.b 
452+ ; CHECK-SVE-NEXT:    punpklo p0.h, p1.b 
453+ ; CHECK-SVE-NEXT:    punpkhi p1.h, p1.b 
454+ ; CHECK-SVE-NEXT:    punpklo p2.h, p3.b 
455+ ; CHECK-SVE-NEXT:    punpkhi p3.h, p3.b 
456+ ; CHECK-SVE-NEXT:    b use 
457+ ; CHECK-SVE-NEXT:  .LBB14_2: // %if.end 
458+ ; CHECK-SVE-NEXT:    ret 
459+ ; 
460+ ; CHECK-SVE2p1-SME2-LABEL: test_4x2bit_mask_with_extracts_and_reinterpret_casts: 
461+ ; CHECK-SVE2p1-SME2:       // %bb.0: // %entry 
462+ ; CHECK-SVE2p1-SME2-NEXT:    cntw x8 
463+ ; CHECK-SVE2p1-SME2-NEXT:    adds x8, x0, x8 
464+ ; CHECK-SVE2p1-SME2-NEXT:    csinv x8, x8, xzr, lo 
465+ ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.d, p1.d }, x0, x1 
466+ ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p2.d, p3.d }, x8, x1 
467+ ; CHECK-SVE2p1-SME2-NEXT:    uzp1 p4.s, p0.s, p1.s 
468+ ; CHECK-SVE2p1-SME2-NEXT:    uzp1 p5.s, p2.s, p3.s 
469+ ; CHECK-SVE2p1-SME2-NEXT:    uzp1 p4.h, p4.h, p5.h 
470+ ; CHECK-SVE2p1-SME2-NEXT:    ptrue p5.h 
471+ ; CHECK-SVE2p1-SME2-NEXT:    ptest p5, p4.b 
472+ ; CHECK-SVE2p1-SME2-NEXT:    b.pl .LBB14_2 
473+ ; CHECK-SVE2p1-SME2-NEXT:  // %bb.1: // %if.then 
474+ ; CHECK-SVE2p1-SME2-NEXT:    b use 
475+ ; CHECK-SVE2p1-SME2-NEXT:  .LBB14_2: // %if.end 
476+ ; CHECK-SVE2p1-SME2-NEXT:    ret 
477+ entry:
478+     %r  = call  <vscale x 8  x i1 > @llvm.get.active.lane.mask.nxv8i1.i32 (i64  %i , i64  %n )
479+     %v0  = call  <vscale x 2  x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8  x i1 > %r , i64  0 )
480+     %v1  = call  <vscale x 2  x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8  x i1 > %r , i64  2 )
481+     %v2  = call  <vscale x 2  x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8  x i1 > %r , i64  4 )
482+     %v3  = call  <vscale x 2  x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8  x i1 > %r , i64  6 )
483+     %elt0  = extractelement  <vscale x 8  x i1 > %r , i32  0 
484+     br  i1  %elt0 , label  %if.then , label  %if.end 
485+ 
486+ if.then:
487+     tail  call  void  @use (<vscale x 2  x i1 > %v0 , <vscale x 2  x i1 > %v1 , <vscale x 2  x i1 > %v2 , <vscale x 2  x i1 > %v3 )
488+     br  label  %if.end 
489+ 
490+ if.end:
491+     ret  void 
492+ }
493+ 
313494declare  void  @use (...)
314495
315496attributes  #0  = { nounwind  }
0 commit comments