@@ -355,15 +355,41 @@ define i128 @i128_mul(i128 %x, i128 %y) {
355
355
define { i128 , i8 } @i128_checked_mul (i128 %x , i128 %y ) {
356
356
; CHECK-LABEL: i128_checked_mul:
357
357
; CHECK: // %bb.0:
358
- ; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill
359
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
360
- ; CHECK-NEXT: .cfi_offset w30, -16
361
- ; CHECK-NEXT: add x4, sp, #8
362
- ; CHECK-NEXT: bl __muloti4
363
- ; CHECK-NEXT: ldr x8, [sp, #8]
364
- ; CHECK-NEXT: cmp x8, #0
358
+ ; CHECK-NEXT: asr x8, x1, #63
359
+ ; CHECK-NEXT: asr x11, x3, #63
360
+ ; CHECK-NEXT: umulh x13, x0, x2
361
+ ; CHECK-NEXT: mul x9, x2, x8
362
+ ; CHECK-NEXT: umulh x10, x2, x8
363
+ ; CHECK-NEXT: umulh x12, x11, x0
364
+ ; CHECK-NEXT: mul x14, x1, x2
365
+ ; CHECK-NEXT: add x10, x10, x9
366
+ ; CHECK-NEXT: madd x8, x3, x8, x10
367
+ ; CHECK-NEXT: madd x10, x11, x1, x12
368
+ ; CHECK-NEXT: mul x11, x11, x0
369
+ ; CHECK-NEXT: umulh x12, x1, x2
370
+ ; CHECK-NEXT: mul x15, x0, x3
371
+ ; CHECK-NEXT: add x10, x10, x11
372
+ ; CHECK-NEXT: adds x9, x11, x9
373
+ ; CHECK-NEXT: umulh x16, x0, x3
374
+ ; CHECK-NEXT: adc x10, x10, x8
375
+ ; CHECK-NEXT: adds x8, x14, x13
376
+ ; CHECK-NEXT: cinc x12, x12, hs
377
+ ; CHECK-NEXT: mul x11, x1, x3
378
+ ; CHECK-NEXT: adds x8, x15, x8
379
+ ; CHECK-NEXT: umulh x13, x1, x3
380
+ ; CHECK-NEXT: mov x1, x8
381
+ ; CHECK-NEXT: cinc x14, x16, hs
382
+ ; CHECK-NEXT: adds x12, x12, x14
383
+ ; CHECK-NEXT: mul x0, x0, x2
384
+ ; CHECK-NEXT: cset w14, hs
385
+ ; CHECK-NEXT: adds x11, x11, x12
386
+ ; CHECK-NEXT: asr x12, x8, #63
387
+ ; CHECK-NEXT: adc x13, x13, x14
388
+ ; CHECK-NEXT: adds x9, x11, x9
389
+ ; CHECK-NEXT: adc x10, x13, x10
390
+ ; CHECK-NEXT: cmp x9, x12
391
+ ; CHECK-NEXT: ccmp x10, x12, #0, eq
365
392
; CHECK-NEXT: cset w2, eq
366
- ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
367
393
; CHECK-NEXT: ret
368
394
%1 = tail call { i128 , i1 } @llvm.smul.with.overflow.i128 (i128 %x , i128 %y )
369
395
%2 = extractvalue { i128 , i1 } %1 , 0
@@ -378,15 +404,41 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
378
404
define { i128 , i8 } @i128_overflowing_mul (i128 %x , i128 %y ) {
379
405
; CHECK-LABEL: i128_overflowing_mul:
380
406
; CHECK: // %bb.0:
381
- ; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill
382
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
383
- ; CHECK-NEXT: .cfi_offset w30, -16
384
- ; CHECK-NEXT: add x4, sp, #8
385
- ; CHECK-NEXT: bl __muloti4
386
- ; CHECK-NEXT: ldr x8, [sp, #8]
387
- ; CHECK-NEXT: cmp x8, #0
407
+ ; CHECK-NEXT: asr x8, x1, #63
408
+ ; CHECK-NEXT: asr x11, x3, #63
409
+ ; CHECK-NEXT: umulh x13, x0, x2
410
+ ; CHECK-NEXT: mul x9, x2, x8
411
+ ; CHECK-NEXT: umulh x10, x2, x8
412
+ ; CHECK-NEXT: umulh x12, x11, x0
413
+ ; CHECK-NEXT: mul x14, x1, x2
414
+ ; CHECK-NEXT: add x10, x10, x9
415
+ ; CHECK-NEXT: madd x8, x3, x8, x10
416
+ ; CHECK-NEXT: madd x10, x11, x1, x12
417
+ ; CHECK-NEXT: mul x11, x11, x0
418
+ ; CHECK-NEXT: umulh x12, x1, x2
419
+ ; CHECK-NEXT: mul x15, x0, x3
420
+ ; CHECK-NEXT: add x10, x10, x11
421
+ ; CHECK-NEXT: adds x9, x11, x9
422
+ ; CHECK-NEXT: umulh x16, x0, x3
423
+ ; CHECK-NEXT: adc x10, x10, x8
424
+ ; CHECK-NEXT: adds x8, x14, x13
425
+ ; CHECK-NEXT: cinc x12, x12, hs
426
+ ; CHECK-NEXT: mul x11, x1, x3
427
+ ; CHECK-NEXT: adds x8, x15, x8
428
+ ; CHECK-NEXT: umulh x13, x1, x3
429
+ ; CHECK-NEXT: mov x1, x8
430
+ ; CHECK-NEXT: cinc x14, x16, hs
431
+ ; CHECK-NEXT: adds x12, x12, x14
432
+ ; CHECK-NEXT: mul x0, x0, x2
433
+ ; CHECK-NEXT: cset w14, hs
434
+ ; CHECK-NEXT: adds x11, x11, x12
435
+ ; CHECK-NEXT: asr x12, x8, #63
436
+ ; CHECK-NEXT: adc x13, x13, x14
437
+ ; CHECK-NEXT: adds x9, x11, x9
438
+ ; CHECK-NEXT: adc x10, x13, x10
439
+ ; CHECK-NEXT: cmp x9, x12
440
+ ; CHECK-NEXT: ccmp x10, x12, #0, eq
388
441
; CHECK-NEXT: cset w2, ne
389
- ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
390
442
; CHECK-NEXT: ret
391
443
%1 = tail call { i128 , i1 } @llvm.smul.with.overflow.i128 (i128 %x , i128 %y )
392
444
%2 = extractvalue { i128 , i1 } %1 , 0
@@ -400,26 +452,46 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
400
452
define i128 @i128_saturating_mul (i128 %x , i128 %y ) {
401
453
; CHECK-LABEL: i128_saturating_mul:
402
454
; CHECK: // %bb.0:
403
- ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
404
- ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
405
- ; CHECK-NEXT: .cfi_def_cfa_offset 32
406
- ; CHECK-NEXT: .cfi_offset w19, -8
407
- ; CHECK-NEXT: .cfi_offset w20, -16
408
- ; CHECK-NEXT: .cfi_offset w30, -32
409
- ; CHECK-NEXT: add x4, sp, #8
410
- ; CHECK-NEXT: mov x19, x3
411
- ; CHECK-NEXT: mov x20, x1
412
- ; CHECK-NEXT: str xzr, [sp, #8]
413
- ; CHECK-NEXT: bl __muloti4
414
- ; CHECK-NEXT: eor x8, x19, x20
415
- ; CHECK-NEXT: ldr x9, [sp, #8]
416
- ; CHECK-NEXT: asr x8, x8, #63
417
- ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
418
- ; CHECK-NEXT: cmp x9, #0
419
- ; CHECK-NEXT: eor x10, x8, #0x7fffffffffffffff
420
- ; CHECK-NEXT: csinv x0, x0, x8, eq
421
- ; CHECK-NEXT: csel x1, x10, x1, ne
422
- ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
455
+ ; CHECK-NEXT: asr x8, x1, #63
456
+ ; CHECK-NEXT: asr x11, x3, #63
457
+ ; CHECK-NEXT: umulh x13, x0, x2
458
+ ; CHECK-NEXT: mul x9, x2, x8
459
+ ; CHECK-NEXT: umulh x10, x2, x8
460
+ ; CHECK-NEXT: umulh x12, x11, x0
461
+ ; CHECK-NEXT: mul x14, x1, x2
462
+ ; CHECK-NEXT: add x10, x10, x9
463
+ ; CHECK-NEXT: madd x8, x3, x8, x10
464
+ ; CHECK-NEXT: madd x10, x11, x1, x12
465
+ ; CHECK-NEXT: mul x11, x11, x0
466
+ ; CHECK-NEXT: umulh x12, x1, x2
467
+ ; CHECK-NEXT: mul x16, x0, x3
468
+ ; CHECK-NEXT: add x10, x10, x11
469
+ ; CHECK-NEXT: adds x9, x11, x9
470
+ ; CHECK-NEXT: umulh x15, x0, x3
471
+ ; CHECK-NEXT: adc x8, x10, x8
472
+ ; CHECK-NEXT: adds x10, x14, x13
473
+ ; CHECK-NEXT: cinc x12, x12, hs
474
+ ; CHECK-NEXT: mul x17, x1, x3
475
+ ; CHECK-NEXT: adds x10, x16, x10
476
+ ; CHECK-NEXT: umulh x11, x1, x3
477
+ ; CHECK-NEXT: cinc x13, x15, hs
478
+ ; CHECK-NEXT: adds x12, x12, x13
479
+ ; CHECK-NEXT: cset w13, hs
480
+ ; CHECK-NEXT: adds x12, x17, x12
481
+ ; CHECK-NEXT: adc x11, x11, x13
482
+ ; CHECK-NEXT: adds x9, x12, x9
483
+ ; CHECK-NEXT: asr x12, x10, #63
484
+ ; CHECK-NEXT: mul x13, x0, x2
485
+ ; CHECK-NEXT: adc x8, x11, x8
486
+ ; CHECK-NEXT: eor x11, x3, x1
487
+ ; CHECK-NEXT: eor x8, x8, x12
488
+ ; CHECK-NEXT: eor x9, x9, x12
489
+ ; CHECK-NEXT: asr x11, x11, #63
490
+ ; CHECK-NEXT: orr x8, x9, x8
491
+ ; CHECK-NEXT: eor x9, x11, #0x7fffffffffffffff
492
+ ; CHECK-NEXT: cmp x8, #0
493
+ ; CHECK-NEXT: csel x1, x9, x10, ne
494
+ ; CHECK-NEXT: csinv x0, x13, x11, eq
423
495
; CHECK-NEXT: ret
424
496
%1 = tail call { i128 , i1 } @llvm.smul.with.overflow.i128 (i128 %x , i128 %y )
425
497
%2 = extractvalue { i128 , i1 } %1 , 0
0 commit comments