@@ -315,11 +315,10 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
315315define i32 @func4 (i32 %x , i32 %y ) nounwind {
316316; X64-LABEL: func4:
317317; X64: # %bb.0:
318- ; X64-NEXT: movl %edi, %ecx
319- ; X64-NEXT: imull %esi, %ecx
320318; X64-NEXT: xorl %eax, %eax
321- ; X64-NEXT: testl %ecx, %ecx
322- ; X64-NEXT: setns %al
319+ ; X64-NEXT: movl %edi, %ecx
320+ ; X64-NEXT: xorl %esi, %ecx
321+ ; X64-NEXT: sets %al
323322; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
324323; X64-NEXT: imull %esi, %edi
325324; X64-NEXT: cmovnol %edi, %eax
@@ -328,13 +327,12 @@ define i32 @func4(i32 %x, i32 %y) nounwind {
328327; X86-LABEL: func4:
329328; X86: # %bb.0:
330329; X86-NEXT: pushl %esi
331- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
332330; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
333- ; X86-NEXT: movl %eax, %esi
334- ; X86-NEXT: imull %edx, %esi
331+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
335332; X86-NEXT: xorl %ecx, %ecx
336- ; X86-NEXT: testl %esi, %esi
337- ; X86-NEXT: setns %cl
333+ ; X86-NEXT: movl %eax, %esi
334+ ; X86-NEXT: xorl %edx, %esi
335+ ; X86-NEXT: sets %cl
338336; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
339337; X86-NEXT: imull %edx, %eax
340338; X86-NEXT: cmovol %ecx, %eax
@@ -347,11 +345,10 @@ define i32 @func4(i32 %x, i32 %y) nounwind {
347345define i64 @func5 (i64 %x , i64 %y ) {
348346; X64-LABEL: func5:
349347; X64: # %bb.0:
350- ; X64-NEXT: movq %rdi, %rax
351- ; X64-NEXT: imulq %rsi, %rax
352348; X64-NEXT: xorl %ecx, %ecx
353- ; X64-NEXT: testq %rax, %rax
354- ; X64-NEXT: setns %cl
349+ ; X64-NEXT: movq %rdi, %rax
350+ ; X64-NEXT: xorq %rsi, %rax
351+ ; X64-NEXT: sets %cl
355352; X64-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
356353; X64-NEXT: addq %rcx, %rax
357354; X64-NEXT: imulq %rsi, %rdi
@@ -360,47 +357,58 @@ define i64 @func5(i64 %x, i64 %y) {
360357;
361358; X86-LABEL: func5:
362359; X86: # %bb.0:
363- ; X86-NEXT: pushl %edi
360+ ; X86-NEXT: pushl %ebp
364361; X86-NEXT: .cfi_def_cfa_offset 8
365- ; X86-NEXT: pushl %esi
362+ ; X86-NEXT: pushl %ebx
366363; X86-NEXT: .cfi_def_cfa_offset 12
367- ; X86-NEXT: pushl %eax
364+ ; X86-NEXT: pushl %edi
368365; X86-NEXT: .cfi_def_cfa_offset 16
369- ; X86-NEXT: .cfi_offset %esi, -12
370- ; X86-NEXT: .cfi_offset %edi, -8
371- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
372- ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
366+ ; X86-NEXT: pushl %esi
367+ ; X86-NEXT: .cfi_def_cfa_offset 20
368+ ; X86-NEXT: pushl %eax
369+ ; X86-NEXT: .cfi_def_cfa_offset 24
370+ ; X86-NEXT: .cfi_offset %esi, -20
371+ ; X86-NEXT: .cfi_offset %edi, -16
372+ ; X86-NEXT: .cfi_offset %ebx, -12
373+ ; X86-NEXT: .cfi_offset %ebp, -8
373374; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
374- ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
375+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
376+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
375377; X86-NEXT: movl $0, (%esp)
376378; X86-NEXT: movl %esp, %edi
379+ ; X86-NEXT: xorl %ebp, %ebp
380+ ; X86-NEXT: xorl %ebx, %ebx
381+ ; X86-NEXT: movl %eax, %esi
382+ ; X86-NEXT: xorl %ecx, %esi
383+ ; X86-NEXT: movl $-1, %esi
384+ ; X86-NEXT: cmovsl %ebp, %esi
385+ ; X86-NEXT: sets %bl
386+ ; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
377387; X86-NEXT: pushl %edi
378388; X86-NEXT: .cfi_adjust_cfa_offset 4
379- ; X86-NEXT: pushl %esi
389+ ; X86-NEXT: pushl %ecx
380390; X86-NEXT: .cfi_adjust_cfa_offset 4
381391; X86-NEXT: pushl %edx
382392; X86-NEXT: .cfi_adjust_cfa_offset 4
383- ; X86-NEXT: pushl %ecx
384- ; X86-NEXT: .cfi_adjust_cfa_offset 4
385393; X86-NEXT: pushl %eax
386394; X86-NEXT: .cfi_adjust_cfa_offset 4
395+ ; X86-NEXT: pushl {{[0-9]+}}(%esp)
396+ ; X86-NEXT: .cfi_adjust_cfa_offset 4
387397; X86-NEXT: calll __mulodi4
388398; X86-NEXT: addl $20, %esp
389399; X86-NEXT: .cfi_adjust_cfa_offset -20
390- ; X86-NEXT: xorl %ecx, %ecx
391- ; X86-NEXT: testl %edx, %edx
392- ; X86-NEXT: setns %cl
393- ; X86-NEXT: addl $2147483647, %ecx # imm = 0x7FFFFFFF
394- ; X86-NEXT: movl %edx, %esi
395- ; X86-NEXT: sarl $31, %esi
396400; X86-NEXT: cmpl $0, (%esp)
397401; X86-NEXT: cmovnel %esi, %eax
398- ; X86-NEXT: cmovnel %ecx , %edx
402+ ; X86-NEXT: cmovnel %ebx , %edx
399403; X86-NEXT: addl $4, %esp
400- ; X86-NEXT: .cfi_def_cfa_offset 12
404+ ; X86-NEXT: .cfi_def_cfa_offset 20
401405; X86-NEXT: popl %esi
402- ; X86-NEXT: .cfi_def_cfa_offset 8
406+ ; X86-NEXT: .cfi_def_cfa_offset 16
403407; X86-NEXT: popl %edi
408+ ; X86-NEXT: .cfi_def_cfa_offset 12
409+ ; X86-NEXT: popl %ebx
410+ ; X86-NEXT: .cfi_def_cfa_offset 8
411+ ; X86-NEXT: popl %ebp
404412; X86-NEXT: .cfi_def_cfa_offset 4
405413; X86-NEXT: retl
406414 %tmp = call i64 @llvm.smul.fix.sat.i64 (i64 %x , i64 %y , i32 0 )
@@ -414,36 +422,34 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
414422; X64-NEXT: shlb $4, %sil
415423; X64-NEXT: sarb $4, %sil
416424; X64-NEXT: shlb $4, %al
425+ ; X64-NEXT: xorl %ecx, %ecx
426+ ; X64-NEXT: movl %eax, %edx
427+ ; X64-NEXT: xorb %sil, %dl
428+ ; X64-NEXT: sets %cl
429+ ; X64-NEXT: addl $127, %ecx
417430; X64-NEXT: # kill: def $al killed $al killed $eax
418431; X64-NEXT: imulb %sil
419- ; X64-NEXT: seto %cl
420- ; X64-NEXT: xorl %edx, %edx
421- ; X64-NEXT: testb %al, %al
422- ; X64-NEXT: setns %dl
423- ; X64-NEXT: addl $127, %edx
424432; X64-NEXT: movzbl %al, %eax
425- ; X64-NEXT: testb %cl, %cl
426- ; X64-NEXT: cmovnel %edx, %eax
433+ ; X64-NEXT: cmovol %ecx, %eax
427434; X64-NEXT: sarb $4, %al
428435; X64-NEXT: # kill: def $al killed $al killed $eax
429436; X64-NEXT: retq
430437;
431438; X86-LABEL: func6:
432439; X86: # %bb.0:
433- ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
434- ; X86-NEXT: shlb $4, %cl
435- ; X86-NEXT: sarb $4, %cl
440+ ; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
441+ ; X86-NEXT: shlb $4, %dl
442+ ; X86-NEXT: sarb $4, %dl
436443; X86-NEXT: movb {{[0-9]+}}(%esp), %al
437444; X86-NEXT: shlb $4, %al
438- ; X86-NEXT: imulb %cl
439- ; X86-NEXT: seto %dl
440445; X86-NEXT: xorl %ecx, %ecx
441- ; X86-NEXT: testb %al, %al
442- ; X86-NEXT: setns %cl
446+ ; X86-NEXT: movb %al, %ah
447+ ; X86-NEXT: xorb %dl, %ah
448+ ; X86-NEXT: sets %cl
443449; X86-NEXT: addl $127, %ecx
450+ ; X86-NEXT: imulb %dl
444451; X86-NEXT: movzbl %al, %eax
445- ; X86-NEXT: testb %dl, %dl
446- ; X86-NEXT: cmovnel %ecx, %eax
452+ ; X86-NEXT: cmovol %ecx, %eax
447453; X86-NEXT: sarb $4, %al
448454; X86-NEXT: # kill: def $al killed $al killed $eax
449455; X86-NEXT: retl
@@ -454,59 +460,56 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
454460define <4 x i32 > @vec2 (<4 x i32 > %x , <4 x i32 > %y ) nounwind {
455461; X64-LABEL: vec2:
456462; X64: # %bb.0:
457- ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
463+ ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3]
464+ ; X64-NEXT: movd %xmm2, %eax
465+ ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
458466; X64-NEXT: movd %xmm2, %ecx
459- ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
460- ; X64-NEXT: movd %xmm2, %r8d
461- ; X64-NEXT: movl %r8d, %edx
462- ; X64-NEXT: imull %ecx, %edx
463- ; X64-NEXT: xorl %esi, %esi
464- ; X64-NEXT: testl %edx, %edx
465- ; X64-NEXT: setns %sil
466- ; X64-NEXT: addl $2147483647, %esi # imm = 0x7FFFFFFF
467- ; X64-NEXT: imull %ecx, %r8d
468- ; X64-NEXT: cmovol %esi, %r8d
469- ; X64-NEXT: movd %xmm1, %edx
467+ ; X64-NEXT: xorl %edx, %edx
468+ ; X64-NEXT: movl %ecx, %esi
469+ ; X64-NEXT: xorl %eax, %esi
470+ ; X64-NEXT: sets %dl
471+ ; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
472+ ; X64-NEXT: imull %eax, %ecx
473+ ; X64-NEXT: cmovol %edx, %ecx
474+ ; X64-NEXT: movd %ecx, %xmm2
475+ ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
476+ ; X64-NEXT: movd %xmm3, %eax
477+ ; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
478+ ; X64-NEXT: movd %xmm3, %ecx
479+ ; X64-NEXT: xorl %edx, %edx
480+ ; X64-NEXT: movl %ecx, %esi
481+ ; X64-NEXT: xorl %eax, %esi
482+ ; X64-NEXT: sets %dl
483+ ; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
484+ ; X64-NEXT: imull %eax, %ecx
485+ ; X64-NEXT: cmovol %edx, %ecx
486+ ; X64-NEXT: movd %ecx, %xmm3
487+ ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
488+ ; X64-NEXT: movd %xmm1, %eax
489+ ; X64-NEXT: movd %xmm0, %ecx
490+ ; X64-NEXT: xorl %edx, %edx
491+ ; X64-NEXT: movl %ecx, %esi
492+ ; X64-NEXT: xorl %eax, %esi
493+ ; X64-NEXT: sets %dl
494+ ; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
495+ ; X64-NEXT: imull %eax, %ecx
496+ ; X64-NEXT: cmovol %edx, %ecx
497+ ; X64-NEXT: movd %ecx, %xmm2
498+ ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
499+ ; X64-NEXT: movd %xmm1, %eax
500+ ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
470501; X64-NEXT: movd %xmm0, %ecx
502+ ; X64-NEXT: xorl %edx, %edx
471503; X64-NEXT: movl %ecx, %esi
472- ; X64-NEXT: imull %edx, %esi
473- ; X64-NEXT: xorl %edi, %edi
474- ; X64-NEXT: testl %esi, %esi
475- ; X64-NEXT: setns %dil
476- ; X64-NEXT: addl $2147483647, %edi # imm = 0x7FFFFFFF
477- ; X64-NEXT: imull %edx, %ecx
478- ; X64-NEXT: cmovol %edi, %ecx
479- ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
480- ; X64-NEXT: movd %xmm2, %edx
481- ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
482- ; X64-NEXT: movd %xmm2, %esi
483- ; X64-NEXT: movl %esi, %edi
484- ; X64-NEXT: imull %edx, %edi
485- ; X64-NEXT: xorl %eax, %eax
486- ; X64-NEXT: testl %edi, %edi
487- ; X64-NEXT: setns %al
488- ; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
489- ; X64-NEXT: imull %edx, %esi
490- ; X64-NEXT: cmovol %eax, %esi
491- ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3]
492- ; X64-NEXT: movd %xmm1, %r9d
493- ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
494- ; X64-NEXT: movd %xmm0, %edx
495- ; X64-NEXT: movl %edx, %edi
496- ; X64-NEXT: imull %r9d, %edi
497- ; X64-NEXT: xorl %eax, %eax
498- ; X64-NEXT: testl %edi, %edi
499- ; X64-NEXT: setns %al
500- ; X64-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
501- ; X64-NEXT: imull %r9d, %edx
502- ; X64-NEXT: cmovol %eax, %edx
503- ; X64-NEXT: movd %edx, %xmm0
504- ; X64-NEXT: movd %esi, %xmm1
505- ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
504+ ; X64-NEXT: xorl %eax, %esi
505+ ; X64-NEXT: sets %dl
506+ ; X64-NEXT: addl $2147483647, %edx # imm = 0x7FFFFFFF
507+ ; X64-NEXT: imull %eax, %ecx
508+ ; X64-NEXT: cmovol %edx, %ecx
506509; X64-NEXT: movd %ecx, %xmm0
507- ; X64-NEXT: movd %r8d, % xmm2
508- ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0], xmm2[0],xmm0[1],xmm2[1 ]
509- ; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
510+ ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
511+ ; X64-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0 ]
512+ ; X64-NEXT: movdqa %xmm2, % xmm0
510513; X64-NEXT: retq
511514;
512515; X86-LABEL: vec2:
@@ -515,51 +518,47 @@ define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
515518; X86-NEXT: pushl %ebx
516519; X86-NEXT: pushl %edi
517520; X86-NEXT: pushl %esi
521+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
522+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
523+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
524+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
518525; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
526+ ; X86-NEXT: xorl %ebx, %ebx
527+ ; X86-NEXT: movl %ecx, %edx
528+ ; X86-NEXT: xorl %edi, %edx
519529; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
520- ; X86-NEXT: movl %ecx, %esi
521- ; X86-NEXT: imull %edx, %esi
522- ; X86-NEXT: xorl %eax, %eax
523- ; X86-NEXT: testl %esi, %esi
524- ; X86-NEXT: setns %al
525- ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
526- ; X86-NEXT: imull %edx, %ecx
527- ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
528- ; X86-NEXT: cmovol %eax, %ecx
529- ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
530+ ; X86-NEXT: sets %bl
531+ ; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
532+ ; X86-NEXT: imull %edi, %ecx
533+ ; X86-NEXT: cmovol %ebx, %ecx
534+ ; X86-NEXT: xorl %ebx, %ebx
530535; X86-NEXT: movl %edx, %edi
531- ; X86-NEXT: imull %esi, %edi
532- ; X86-NEXT: xorl %eax, %eax
533- ; X86-NEXT: testl %edi, %edi
534- ; X86-NEXT: setns %al
535- ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
536- ; X86-NEXT: imull %esi, %edx
537- ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
538- ; X86-NEXT: cmovol %eax, %edx
539- ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
540- ; X86-NEXT: movl %esi, %ebx
541- ; X86-NEXT: imull %edi, %ebx
542- ; X86-NEXT: xorl %eax, %eax
543- ; X86-NEXT: testl %ebx, %ebx
544- ; X86-NEXT: setns %al
545- ; X86-NEXT: addl $2147483647, %eax # imm = 0x7FFFFFFF
546- ; X86-NEXT: imull %edi, %esi
536+ ; X86-NEXT: xorl %ebp, %edi
547537; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
548- ; X86-NEXT: cmovol %eax, %esi
549- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
538+ ; X86-NEXT: sets %bl
539+ ; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
540+ ; X86-NEXT: imull %ebp, %edx
541+ ; X86-NEXT: cmovol %ebx, %edx
542+ ; X86-NEXT: xorl %ebx, %ebx
550543; X86-NEXT: movl %edi, %ebp
551- ; X86-NEXT: imull %eax, %ebp
544+ ; X86-NEXT: xorl %esi, %ebp
545+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
546+ ; X86-NEXT: sets %bl
547+ ; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
548+ ; X86-NEXT: imull %esi, %edi
549+ ; X86-NEXT: cmovol %ebx, %edi
552550; X86-NEXT: xorl %ebx, %ebx
553- ; X86-NEXT: testl %ebp, %ebp
554- ; X86-NEXT: setns %bl
551+ ; X86-NEXT: movl %ebp, %esi
552+ ; X86-NEXT: xorl %eax, %esi
553+ ; X86-NEXT: sets %bl
555554; X86-NEXT: addl $2147483647, %ebx # imm = 0x7FFFFFFF
556- ; X86-NEXT: imull %eax, %edi
555+ ; X86-NEXT: imull %eax, %ebp
556+ ; X86-NEXT: cmovol %ebx, %ebp
557557; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
558- ; X86-NEXT: cmovol %ebx, %edi
559- ; X86-NEXT: movl %ecx, 12(%eax)
560- ; X86-NEXT: movl %edx, 8(%eax)
561- ; X86-NEXT: movl %esi, 4(%eax)
562- ; X86-NEXT: movl %edi, (%eax)
558+ ; X86-NEXT: movl %ebp, 12(%eax)
559+ ; X86-NEXT: movl %edi, 8(%eax)
560+ ; X86-NEXT: movl %edx, 4(%eax)
561+ ; X86-NEXT: movl %ecx, (%eax)
563562; X86-NEXT: popl %esi
564563; X86-NEXT: popl %edi
565564; X86-NEXT: popl %ebx
0 commit comments