Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

failed to deduce type of memset #16

Closed
ZuseZ4 opened this issue Apr 4, 2023 · 4 comments · Fixed by EnzymeAD/Enzyme#1426
Closed

failed to deduce type of memset #16

ZuseZ4 opened this issue Apr 4, 2023 · 4 comments · Fixed by EnzymeAD/Enzyme#1426
Assignees

Comments

@ZuseZ4
Copy link
Member

ZuseZ4 commented Apr 4, 2023

error: <unknown>:0:0: in function preprocess__ZN5small8f_energy17h3bb4e9e5f821a19bE float ([12 x float]*, [51 x float]*): Enzyme: failed to deduce type of memset   call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(24) %5, i8 0, i64 24, i1 false) #66, !alias.scope !3, !noalias !6

Small example, after commit 467171d, updating Enzyme.

@ZuseZ4
Copy link
Member Author

ZuseZ4 commented Sep 15, 2023

after simplification :
; Function Attrs: mustprogress noinline nonlazybind sanitize_hwaddress willreturn uwtable
define internal noundef float @preprocess__ZN8rust_msa8f_energy17ha2cc74f5e9434801E(ptr noalias nocapture noundef readonly align 4 dereferenceable(108) %0, ptr noalias nocapture readnone align 4 %1, ptr noalias nocapture noundef readonly align 4 dereferenceable(832) %2) unnamed_addr #123 {
  %4 = alloca [36 x float], align 4
  %5 = alloca [208 x float], align 4
  %6 = alloca [36 x float], align 4
  call void @llvm.lifetime.start.p0(i64 144, ptr nonnull %6) #124
  tail call void @llvm.experimental.noalias.scope.decl(metadata !35484) #124
  call void @llvm.lifetime.start.p0(i64 144, ptr nonnull %4) #124, !noalias !35487
  call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(144) %4, i8 0, i64 144, i1 false) #124, !noalias !35487
  br label %11

7:                                                ; preds = %30
  br label %8

8:                                                ; preds = %7, %11
  %9 = phi i64 [ %13, %11 ], [ %53, %7 ]
  %10 = icmp eq i64 %14, 9
  br i1 %10, label %56, label %11

11:                                               ; preds = %8, %3
  %12 = phi i64 [ %14, %8 ], [ 0, %3 ]
  %13 = phi i64 [ %9, %8 ], [ 0, %3 ]
  %14 = add nuw nsw i64 %12, 1
  %15 = icmp ult i64 %12, 8
  br i1 %15, label %16, label %8

16:                                               ; preds = %11
  %17 = mul nuw nsw i64 %12, 3
  %18 = add nuw nsw i64 %17, 2
  %19 = add nuw nsw i64 %17, 1
  %20 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %19
  %21 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %18
  %22 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %17
  %23 = load float, ptr %22, align 4, !alias.scope !35489, !noalias !35492, !noundef !10
  br label %24

24:                                               ; preds = %30, %16
  %25 = phi i64 [ %27, %30 ], [ 0, %16 ]
  %26 = add i64 %13, %25
  %27 = add nuw nsw i64 %25, 1
  %28 = add i64 %12, %25
  %29 = icmp ult i64 %26, 36
  br i1 %29, label %30, label %55, !prof !42

30:                                               ; preds = %24
  %31 = add nuw nsw i64 %28, 1
  %32 = mul nuw nsw i64 %31, 3
  %33 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %32
  %34 = load float, ptr %33, align 4, !alias.scope !35489, !noalias !35492, !noundef !10
  %35 = fsub float %23, %34
  %36 = fmul float %35, %35
  %37 = load float, ptr %20, align 4, !alias.scope !35489, !noalias !35492, !noundef !10
  %38 = add nuw nsw i64 %32, 1
  %39 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %38
  %40 = load float, ptr %39, align 4, !alias.scope !35489, !noalias !35492, !noundef !10
  %41 = fsub float %37, %40
  %42 = fmul float %41, %41
  %43 = fadd float %36, %42
  %44 = load float, ptr %21, align 4, !alias.scope !35489, !noalias !35492, !noundef !10
  %45 = add nuw nsw i64 %32, 2
  %46 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %45
  %47 = load float, ptr %46, align 4, !alias.scope !35489, !noalias !35492, !noundef !10
  %48 = fsub float %44, %47
  %49 = fmul float %48, %48
  %50 = fadd float %43, %49
  %51 = tail call float @llvm.sqrt.f32(float %50) #124
  %52 = getelementptr inbounds [36 x float], ptr %4, i64 0, i64 %26
  store float %51, ptr %52, align 4, !noalias !35487
  %53 = add nuw nsw i64 %26, 1
  %54 = icmp ult i64 %28, 7
  br i1 %54, label %24, label %7

55:                                               ; preds = %24
  tail call void @_ZN4core9panicking18panic_bounds_check17h813e7c5548f0e1edE(i64 noundef %26, i64 noundef 36, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @anon.e83e78a722cbecfa38fd7ba2e5793b11.2) #125, !noalias !35487
  unreachable

56:                                               ; preds = %8
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(144) %6, ptr noundef nonnull align 4 dereferenceable(144) %4, i64 144, i1 false) #124, !noalias !35484
  call void @llvm.lifetime.end.p0(i64 144, ptr nonnull %4) #124, !noalias !35487
  call void @llvm.lifetime.start.p0(i64 832, ptr nonnull %5) #124
  call void @_ZN8rust_msa19polynomials_ethanol13f_polynomials17h7d61e2b579238fb0E(ptr noalias nocapture noundef nonnull sret([208 x float]) dereferenceable(832) %5, ptr noalias noundef nonnull readonly align 4 dereferenceable(144) %6) #124
  br label %58

57:                                               ; preds = %58
  call void @llvm.lifetime.end.p0(i64 832, ptr nonnull %5) #124
  call void @llvm.lifetime.end.p0(i64 144, ptr nonnull %6) #124
  ret float %67

58:                                               ; preds = %58, %56
  %59 = phi i64 [ %61, %58 ], [ 0, %56 ]
  %60 = phi float [ 0.000000e+00, %56 ], [ %67, %58 ]
  %61 = add nuw nsw i64 %59, 1
  %62 = getelementptr inbounds [208 x float], ptr %2, i64 0, i64 %59
  %63 = load float, ptr %62, align 4, !noundef !10
  %64 = getelementptr inbounds [208 x float], ptr %5, i64 0, i64 %59
  %65 = load float, ptr %64, align 4, !noundef !10
  %66 = fmul float %63, %65
  %67 = fadd float %60, %66
  %68 = icmp eq i64 %61, 208
  br i1 %68, label %57, label %58
}

error: <unknown>:0:0: in function preprocess__ZN8rust_msa8f_energy17ha2cc74f5e9434801E float (ptr, ptr, ptr): Enzyme: Cannot deduce type of memset   call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(144) %4, i8 0, i64 144, i1 false) #125, !noalias !7
<analysis>
  %4 = alloca [36 x float], align 4: {[-1]:Pointer}, intvals: {}
  %12 = phi i64 [ %14, %8 ], [ 0, %3 ]: {[-1]:Integer}, intvals: {0,}
  %61 = add nuw nsw i64 %59, 1: {[-1]:Integer}, intvals: {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,}
  %28 = add i64 %12, %25: {[-1]:Integer}, intvals: {0,}
  %26 = add i64 %13, %25: {[-1]:Integer}, intvals: {0,}
ptr %0: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Float@float, [-1,8]:Float@float, [-1,12]:Float@float, [-1,16]:Float@float, [-1,20]:Float@float, [-1,24]:Float@float, [-1,28]:Float@float, [-1,32]:Float@float, [-1,36]:Float@float, [-1,40]:Float@float, [-1,44]:Float@float, [-1,48]:Float@float, [-1,52]:Float@float, [-1,56]:Float@float, [-1,60]:Float@float, [-1,64]:Float@float, [-1,68]:Float@float, [-1,72]:Float@float, [-1,76]:Float@float, [-1,80]:Float@float, [-1,84]:Float@float, [-1,88]:Float@float, [-1,92]:Float@float, [-1,96]:Float@float, [-1,100]:Float@float, [-1,104]:Float@float}, intvals: {}
ptr %1: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Float@float, [-1,8]:Float@float, [-1,12]:Float@float, [-1,16]:Float@float, [-1,20]:Float@float, [-1,24]:Float@float, [-1,28]:Float@float, [-1,32]:Float@float, [-1,36]:Float@float, [-1,40]:Float@float, [-1,44]:Float@float, [-1,48]:Float@float, [-1,52]:Float@float, [-1,56]:Float@float, [-1,60]:Float@float, [-1,64]:Float@float, [-1,68]:Float@float, [-1,72]:Float@float, [-1,76]:Float@float, [-1,80]:Float@float, [-1,84]:Float@float, [-1,88]:Float@float, [-1,92]:Float@float, [-1,96]:Float@float, [-1,100]:Float@float, [-1,104]:Float@float, [-1,108]:Float@float, [-1,112]:Float@float, [-1,116]:Float@float, [-1,120]:Float@float, [-1,124]:Float@float, [-1,128]:Float@float, [-1,132]:Float@float, [-1,136]:Float@float, [-1,140]:Float@float, [-1,144]:Float@float, [-1,148]:Float@float, [-1,152]:Float@float, [-1,156]:Float@float, [-1,160]:Float@float, [-1,164]:Float@float, [-1,168]:Float@float, [-1,172]:Float@float, [-1,176]:Float@float, [-1,180]:Float@float, [-1,184]:Float@float, [-1,188]:Float@float, [-1,192]:Float@float, [-1,196]:Float@float, [-1,200]:Float@float, [-1,204]:Float@float, [-1,208]:Float@float, [-1,212]:Float@float, [-1,216]:Float@float, [-1,220]:Float@float, [-1,224]:Float@float, [-1,228]:Float@float, [-1,232]:Float@float, [-1,236]:Float@float, [-1,240]:Float@float, [-1,244]:Float@float, [-1,248]:Float@float, [-1,252]:Float@float, [-1,256]:Float@float, [-1,260]:Float@float, [-1,264]:Float@float, [-1,268]:Float@float, [-1,272]:Float@float, [-1,276]:Float@float, [-1,280]:Float@float, [-1,284]:Float@float, [-1,288]:Float@float, [-1,292]:Float@float, [-1,296]:Float@float, [-1,300]:Float@float, [-1,304]:Float@float, [-1,308]:Float@float, [-1,312]:Float@float, [-1,316]:Float@float, [-1,320]:Float@float, [-1,324]:Float@float, [-1,328]:Float@float, [-1,332]:Float@float, [-1,336]:Float@float, [-1,340]:Float@float, [-1,344]:Float@float, [-1,348]:Float@float, [-1,352]:Float@float, [-1,356]:Float@float, [-1,360]:Float@float, [-1,364]:Float@float, [-1,368]:Float@float, [-1,372]:Float@float, [-1,376]:Float@float, [-1,380]:Float@float, [-1,384]:Float@float, [-1,388]:Float@float, [-1,392]:Float@float, [-1,396]:Float@float, [-1,400]:Float@float, [-1,404]:Float@float, [-1,408]:Float@float, [-1,412]:Float@float, [-1,416]:Float@float, [-1,420]:Float@float, [-1,424]:Float@float, [-1,428]:Float@float, [-1,432]:Float@float, [-1,436]:Float@float, [-1,440]:Float@float, [-1,444]:Float@float, [-1,448]:Float@float, [-1,452]:Float@float, [-1,456]:Float@float, [-1,460]:Float@float, [-1,464]:Float@float, [-1,468]:Float@float, [-1,472]:Float@float, [-1,476]:Float@float, [-1,480]:Float@float, [-1,484]:Float@float, [-1,488]:Float@float, [-1,492]:Float@float, [-1,496]:Float@float, [-1,500]:Float@float}, intvals: {}
ptr %2: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Float@float, [-1,8]:Float@float, [-1,12]:Float@float, [-1,16]:Float@float, [-1,20]:Float@float, [-1,24]:Float@float, [-1,28]:Float@float, [-1,32]:Float@float, [-1,36]:Float@float, [-1,40]:Float@float, [-1,44]:Float@float, [-1,48]:Float@float, [-1,52]:Float@float, [-1,56]:Float@float, [-1,60]:Float@float, [-1,64]:Float@float, [-1,68]:Float@float, [-1,72]:Float@float, [-1,76]:Float@float, [-1,80]:Float@float, [-1,84]:Float@float, [-1,88]:Float@float, [-1,92]:Float@float, [-1,96]:Float@float, [-1,100]:Float@float, [-1,104]:Float@float, [-1,108]:Float@float, [-1,112]:Float@float, [-1,116]:Float@float, [-1,120]:Float@float, [-1,124]:Float@float, [-1,128]:Float@float, [-1,132]:Float@float, [-1,136]:Float@float, [-1,140]:Float@float, [-1,144]:Float@float, [-1,148]:Float@float, [-1,152]:Float@float, [-1,156]:Float@float, [-1,160]:Float@float, [-1,164]:Float@float, [-1,168]:Float@float, [-1,172]:Float@float, [-1,176]:Float@float, [-1,180]:Float@float, [-1,184]:Float@float, [-1,188]:Float@float, [-1,192]:Float@float, [-1,196]:Float@float, [-1,200]:Float@float, [-1,204]:Float@float, [-1,208]:Float@float, [-1,212]:Float@float, [-1,216]:Float@float, [-1,220]:Float@float, [-1,224]:Float@float, [-1,228]:Float@float, [-1,232]:Float@float, [-1,236]:Float@float, [-1,240]:Float@float, [-1,244]:Float@float, [-1,248]:Float@float, [-1,252]:Float@float, [-1,256]:Float@float, [-1,260]:Float@float, [-1,264]:Float@float, [-1,268]:Float@float, [-1,272]:Float@float, [-1,276]:Float@float, [-1,280]:Float@float, [-1,284]:Float@float, [-1,288]:Float@float, [-1,292]:Float@float, [-1,296]:Float@float, [-1,300]:Float@float, [-1,304]:Float@float, [-1,308]:Float@float, [-1,312]:Float@float, [-1,316]:Float@float, [-1,320]:Float@float, [-1,324]:Float@float, [-1,328]:Float@float, [-1,332]:Float@float, [-1,336]:Float@float, [-1,340]:Float@float, [-1,344]:Float@float, [-1,348]:Float@float, [-1,352]:Float@float, [-1,356]:Float@float, [-1,360]:Float@float, [-1,364]:Float@float, [-1,368]:Float@float, [-1,372]:Float@float, [-1,376]:Float@float, [-1,380]:Float@float, [-1,384]:Float@float, [-1,388]:Float@float, [-1,392]:Float@float, [-1,396]:Float@float, [-1,400]:Float@float, [-1,404]:Float@float, [-1,408]:Float@float, [-1,412]:Float@float, [-1,416]:Float@float, [-1,420]:Float@float, [-1,424]:Float@float, [-1,428]:Float@float, [-1,432]:Float@float, [-1,436]:Float@float, [-1,440]:Float@float, [-1,444]:Float@float, [-1,448]:Float@float, [-1,452]:Float@float, [-1,456]:Float@float, [-1,460]:Float@float, [-1,464]:Float@float, [-1,468]:Float@float, [-1,472]:Float@float, [-1,476]:Float@float, [-1,480]:Float@float, [-1,484]:Float@float, [-1,488]:Float@float, [-1,492]:Float@float, [-1,496]:Float@float, [-1,500]:Float@float}, intvals: {}
  %51 = tail call float @llvm.sqrt.f32(float %50) #125: {[-1]:Float@float}, intvals: {}
  %52 = getelementptr inbounds [36 x float], ptr %4, i64 0, i64 %26: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %53 = add nuw nsw i64 %26, 1: {[-1]:Integer}, intvals: {1,}
  %54 = icmp ult i64 %28, 7: {[-1]:Integer}, intvals: {}
  call void @_ZN8rust_msa19polynomials_ethanol13f_polynomials17h7d61e2b579238fb0E(ptr noalias nocapture noundef nonnull sret([208 x float]) dereferenceable(832) %5, ptr noalias noundef nonnull readonly align 4 dereferenceable(144) %6) #125: {}, intvals: {}
  %60 = phi float [ 0.000000e+00, %56 ], [ %67, %58 ]: {[-1]:Float@float}, intvals: {}
  %62 = getelementptr inbounds [208 x float], ptr %2, i64 0, i64 %59: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %63 = load float, ptr %62, align 4, !noundef !13: {[-1]:Float@float}, intvals: {}
  %64 = getelementptr inbounds [208 x float], ptr %5, i64 0, i64 %59: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %65 = load float, ptr %64, align 4, !noundef !13: {[-1]:Float@float}, intvals: {}
  %66 = fmul float %63, %65: {[-1]:Float@float}, intvals: {}
  %67 = fadd float %60, %66: {[-1]:Float@float}, intvals: {}
  %68 = icmp eq i64 %61, 208: {[-1]:Integer}, intvals: {}
  %14 = add nuw nsw i64 %12, 1: {[-1]:Integer}, intvals: {1,}
  %59 = phi i64 [ %61, %58 ], [ 0, %56 ]: {[-1]:Integer}, intvals: {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,}
  %5 = alloca [208 x float], align 4: {[-1]:Pointer}, intvals: {}
  %6 = alloca [36 x float], align 4: {[-1]:Pointer}, intvals: {}
  %9 = phi i64 [ %13, %11 ], [ %53, %7 ]: {[-1]:Integer}, intvals: {0,1,}
  %10 = icmp eq i64 %14, 9: {[-1]:Integer}, intvals: {}
  %13 = phi i64 [ %9, %8 ], [ 0, %3 ]: {[-1]:Integer}, intvals: {0,}
  %25 = phi i64 [ %27, %30 ], [ 0, %16 ]: {[-1]:Integer}, intvals: {0,}
  %27 = add nuw nsw i64 %25, 1: {[-1]:Integer}, intvals: {1,}
  %15 = icmp ult i64 %12, 8: {[-1]:Integer}, intvals: {}
  %17 = mul nuw nsw i64 %12, 3: {[-1]:Integer}, intvals: {0,}
  %18 = add nuw nsw i64 %17, 2: {[-1]:Integer}, intvals: {2,}
  %19 = add nuw nsw i64 %17, 1: {[-1]:Integer}, intvals: {1,}
  %20 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %19: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %21 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %18: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %22 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %17: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %23 = load float, ptr %22, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %29 = icmp ult i64 %26, 36: {[-1]:Integer}, intvals: {}
  %31 = add nuw nsw i64 %28, 1: {[-1]:Integer}, intvals: {1,}
  %32 = mul nuw nsw i64 %31, 3: {[-1]:Integer}, intvals: {3,}
  %33 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %32: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %34 = load float, ptr %33, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %35 = fsub float %23, %34: {[-1]:Float@float}, intvals: {}
  %36 = fmul float %35, %35: {[-1]:Float@float}, intvals: {}
  %37 = load float, ptr %20, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %38 = add nuw nsw i64 %32, 1: {[-1]:Integer}, intvals: {4,}
  %39 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %38: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %40 = load float, ptr %39, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %41 = fsub float %37, %40: {[-1]:Float@float}, intvals: {}
  %42 = fmul float %41, %41: {[-1]:Float@float}, intvals: {}
  %43 = fadd float %36, %42: {[-1]:Float@float}, intvals: {}
  %44 = load float, ptr %21, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %45 = add nuw nsw i64 %32, 2: {[-1]:Integer}, intvals: {5,}
  %46 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %45: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %47 = load float, ptr %46, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %48 = fsub float %44, %47: {[-1]:Float@float}, intvals: {}
  %49 = fmul float %48, %48: {[-1]:Float@float}, intvals: {}
  %50 = fadd float %43, %49: {[-1]:Float@float}, intvals: {}
i64 7: {[-1]:Integer}, intvals: {7,}
i64 9: {[-1]:Integer}, intvals: {9,}
i64 36: {[-1]:Integer}, intvals: {36,}
float 0.000000e+00: {[-1]:Anything}, intvals: {}
i64 208: {[-1]:Integer}, intvals: {208,}
i64 2: {[-1]:Integer}, intvals: {2,}
i64 0: {[-1]:Anything}, intvals: {0,}
i64 3: {[-1]:Integer}, intvals: {3,}
i64 1: {[-1]:Integer}, intvals: {1,}
i64 8: {[-1]:Integer}, intvals: {8,}
</analysis>

@ZuseZ4
Copy link
Member Author

ZuseZ4 commented Sep 17, 2023

@wsmoses I updated the submodule. Unfortunately still failing on the lastest Enzyme

@ZuseZ4
Copy link
Member Author

ZuseZ4 commented Sep 17, 2023

warning: `rust_msa` (lib) generated 1 warning
   Compiling rust_msa v0.1.0 (/h/344/drehwald/prog/msa_code)
error: <unknown>:0:0: in function preprocess__ZN8rust_msa8f_energy17h95acfd8e66f79358E float (ptr, ptr, ptr): Enzyme: Cannot deduce type of memset   call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(144) %4, i8 0, i64 144, i1 false) #125, !noalias !7
<analysis>
  %4 = alloca [36 x float], align 4: {[-1]:Pointer}, intvals: {}
  %5 = alloca [208 x float], align 4: {[-1]:Pointer}, intvals: {}
ptr %0: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Float@float, [-1,8]:Float@float, [-1,12]:Float@float, [-1,16]:Float@float, [-1,20]:Float@float, [-1,24]:Float@float, [-1,28]:Float@float, [-1,32]:Float@float, [-1,36]:Float@float, [-1,40]:Float@float, [-1,44]:Float@float, [-1,48]:Float@float, [-1,52]:Float@float, [-1,56]:Float@float, [-1,60]:Float@float, [-1,64]:Float@float, [-1,68]:Float@float, [-1,72]:Float@float, [-1,76]:Float@float, [-1,80]:Float@float, [-1,84]:Float@float, [-1,88]:Float@float, [-1,92]:Float@float, [-1,96]:Float@float, [-1,100]:Float@float, [-1,104]:Float@float}, intvals: {}
ptr %1: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Float@float, [-1,8]:Float@float, [-1,12]:Float@float, [-1,16]:Float@float, [-1,20]:Float@float, [-1,24]:Float@float, [-1,28]:Float@float, [-1,32]:Float@float, [-1,36]:Float@float, [-1,40]:Float@float, [-1,44]:Float@float, [-1,48]:Float@float, [-1,52]:Float@float, [-1,56]:Float@float, [-1,60]:Float@float, [-1,64]:Float@float, [-1,68]:Float@float, [-1,72]:Float@float, [-1,76]:Float@float, [-1,80]:Float@float, [-1,84]:Float@float, [-1,88]:Float@float, [-1,92]:Float@float, [-1,96]:Float@float, [-1,100]:Float@float, [-1,104]:Float@float, [-1,108]:Float@float, [-1,112]:Float@float, [-1,116]:Float@float, [-1,120]:Float@float, [-1,124]:Float@float, [-1,128]:Float@float, [-1,132]:Float@float, [-1,136]:Float@float, [-1,140]:Float@float, [-1,144]:Float@float, [-1,148]:Float@float, [-1,152]:Float@float, [-1,156]:Float@float, [-1,160]:Float@float, [-1,164]:Float@float, [-1,168]:Float@float, [-1,172]:Float@float, [-1,176]:Float@float, [-1,180]:Float@float, [-1,184]:Float@float, [-1,188]:Float@float, [-1,192]:Float@float, [-1,196]:Float@float, [-1,200]:Float@float, [-1,204]:Float@float, [-1,208]:Float@float, [-1,212]:Float@float, [-1,216]:Float@float, [-1,220]:Float@float, [-1,224]:Float@float, [-1,228]:Float@float, [-1,232]:Float@float, [-1,236]:Float@float, [-1,240]:Float@float, [-1,244]:Float@float, [-1,248]:Float@float, [-1,252]:Float@float, [-1,256]:Float@float, [-1,260]:Float@float, [-1,264]:Float@float, [-1,268]:Float@float, [-1,272]:Float@float, [-1,276]:Float@float, [-1,280]:Float@float, [-1,284]:Float@float, [-1,288]:Float@float, [-1,292]:Float@float, [-1,296]:Float@float, [-1,300]:Float@float, [-1,304]:Float@float, [-1,308]:Float@float, [-1,312]:Float@float, [-1,316]:Float@float, [-1,320]:Float@float, [-1,324]:Float@float, [-1,328]:Float@float, [-1,332]:Float@float, [-1,336]:Float@float, [-1,340]:Float@float, [-1,344]:Float@float, [-1,348]:Float@float, [-1,352]:Float@float, [-1,356]:Float@float, [-1,360]:Float@float, [-1,364]:Float@float, [-1,368]:Float@float, [-1,372]:Float@float, [-1,376]:Float@float, [-1,380]:Float@float, [-1,384]:Float@float, [-1,388]:Float@float, [-1,392]:Float@float, [-1,396]:Float@float, [-1,400]:Float@float, [-1,404]:Float@float, [-1,408]:Float@float, [-1,412]:Float@float, [-1,416]:Float@float, [-1,420]:Float@float, [-1,424]:Float@float, [-1,428]:Float@float, [-1,432]:Float@float, [-1,436]:Float@float, [-1,440]:Float@float, [-1,444]:Float@float, [-1,448]:Float@float, [-1,452]:Float@float, [-1,456]:Float@float, [-1,460]:Float@float, [-1,464]:Float@float, [-1,468]:Float@float, [-1,472]:Float@float, [-1,476]:Float@float, [-1,480]:Float@float, [-1,484]:Float@float, [-1,488]:Float@float, [-1,492]:Float@float, [-1,496]:Float@float, [-1,500]:Float@float}, intvals: {}
ptr %2: {[-1]:Pointer, [-1,0]:Float@float, [-1,4]:Float@float, [-1,8]:Float@float, [-1,12]:Float@float, [-1,16]:Float@float, [-1,20]:Float@float, [-1,24]:Float@float, [-1,28]:Float@float, [-1,32]:Float@float, [-1,36]:Float@float, [-1,40]:Float@float, [-1,44]:Float@float, [-1,48]:Float@float, [-1,52]:Float@float, [-1,56]:Float@float, [-1,60]:Float@float, [-1,64]:Float@float, [-1,68]:Float@float, [-1,72]:Float@float, [-1,76]:Float@float, [-1,80]:Float@float, [-1,84]:Float@float, [-1,88]:Float@float, [-1,92]:Float@float, [-1,96]:Float@float, [-1,100]:Float@float, [-1,104]:Float@float, [-1,108]:Float@float, [-1,112]:Float@float, [-1,116]:Float@float, [-1,120]:Float@float, [-1,124]:Float@float, [-1,128]:Float@float, [-1,132]:Float@float, [-1,136]:Float@float, [-1,140]:Float@float, [-1,144]:Float@float, [-1,148]:Float@float, [-1,152]:Float@float, [-1,156]:Float@float, [-1,160]:Float@float, [-1,164]:Float@float, [-1,168]:Float@float, [-1,172]:Float@float, [-1,176]:Float@float, [-1,180]:Float@float, [-1,184]:Float@float, [-1,188]:Float@float, [-1,192]:Float@float, [-1,196]:Float@float, [-1,200]:Float@float, [-1,204]:Float@float, [-1,208]:Float@float, [-1,212]:Float@float, [-1,216]:Float@float, [-1,220]:Float@float, [-1,224]:Float@float, [-1,228]:Float@float, [-1,232]:Float@float, [-1,236]:Float@float, [-1,240]:Float@float, [-1,244]:Float@float, [-1,248]:Float@float, [-1,252]:Float@float, [-1,256]:Float@float, [-1,260]:Float@float, [-1,264]:Float@float, [-1,268]:Float@float, [-1,272]:Float@float, [-1,276]:Float@float, [-1,280]:Float@float, [-1,284]:Float@float, [-1,288]:Float@float, [-1,292]:Float@float, [-1,296]:Float@float, [-1,300]:Float@float, [-1,304]:Float@float, [-1,308]:Float@float, [-1,312]:Float@float, [-1,316]:Float@float, [-1,320]:Float@float, [-1,324]:Float@float, [-1,328]:Float@float, [-1,332]:Float@float, [-1,336]:Float@float, [-1,340]:Float@float, [-1,344]:Float@float, [-1,348]:Float@float, [-1,352]:Float@float, [-1,356]:Float@float, [-1,360]:Float@float, [-1,364]:Float@float, [-1,368]:Float@float, [-1,372]:Float@float, [-1,376]:Float@float, [-1,380]:Float@float, [-1,384]:Float@float, [-1,388]:Float@float, [-1,392]:Float@float, [-1,396]:Float@float, [-1,400]:Float@float, [-1,404]:Float@float, [-1,408]:Float@float, [-1,412]:Float@float, [-1,416]:Float@float, [-1,420]:Float@float, [-1,424]:Float@float, [-1,428]:Float@float, [-1,432]:Float@float, [-1,436]:Float@float, [-1,440]:Float@float, [-1,444]:Float@float, [-1,448]:Float@float, [-1,452]:Float@float, [-1,456]:Float@float, [-1,460]:Float@float, [-1,464]:Float@float, [-1,468]:Float@float, [-1,472]:Float@float, [-1,476]:Float@float, [-1,480]:Float@float, [-1,484]:Float@float, [-1,488]:Float@float, [-1,492]:Float@float, [-1,496]:Float@float, [-1,500]:Float@float}, intvals: {}
  %6 = alloca [36 x float], align 4: {[-1]:Pointer}, intvals: {}
  %9 = phi i64 [ %13, %11 ], [ %53, %7 ]: {[-1]:Integer}, intvals: {0,1,}
  %10 = icmp eq i64 %14, 9: {[-1]:Integer}, intvals: {}
  %13 = phi i64 [ %9, %8 ], [ 0, %3 ]: {[-1]:Integer}, intvals: {0,}
  %25 = phi i64 [ %27, %30 ], [ 0, %16 ]: {[-1]:Integer}, intvals: {0,}
  %27 = add nuw nsw i64 %25, 1: {[-1]:Integer}, intvals: {1,}
  %15 = icmp ult i64 %12, 8: {[-1]:Integer}, intvals: {}
  %17 = mul nuw nsw i64 %12, 3: {[-1]:Integer}, intvals: {0,}
  %18 = add nuw nsw i64 %17, 2: {[-1]:Integer}, intvals: {2,}
  %19 = add nuw nsw i64 %17, 1: {[-1]:Integer}, intvals: {1,}
  %20 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %19: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %21 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %18: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %22 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %17: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %23 = load float, ptr %22, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %29 = icmp ult i64 %26, 36: {[-1]:Integer}, intvals: {}
  %31 = add nuw nsw i64 %28, 1: {[-1]:Integer}, intvals: {1,}
  %32 = mul nuw nsw i64 %31, 3: {[-1]:Integer}, intvals: {3,}
  %33 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %32: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %34 = load float, ptr %33, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %35 = fsub float %23, %34: {[-1]:Float@float}, intvals: {}
  %36 = fmul float %35, %35: {[-1]:Float@float}, intvals: {}
  %37 = load float, ptr %20, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %38 = add nuw nsw i64 %32, 1: {[-1]:Integer}, intvals: {4,}
  %39 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %38: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %40 = load float, ptr %39, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %41 = fsub float %37, %40: {[-1]:Float@float}, intvals: {}
  %42 = fmul float %41, %41: {[-1]:Float@float}, intvals: {}
  %43 = fadd float %36, %42: {[-1]:Float@float}, intvals: {}
  %44 = load float, ptr %21, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %45 = add nuw nsw i64 %32, 2: {[-1]:Integer}, intvals: {5,}
  %46 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %45: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %47 = load float, ptr %46, align 4, !alias.scope !9, !noalias !12, !noundef !13: {[-1]:Float@float}, intvals: {}
  %48 = fsub float %44, %47: {[-1]:Float@float}, intvals: {}
  %12 = phi i64 [ %14, %8 ], [ 0, %3 ]: {[-1]:Integer}, intvals: {0,}
  %61 = add nuw nsw i64 %59, 1: {[-1]:Integer}, intvals: {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,}
  %28 = add i64 %12, %25: {[-1]:Integer}, intvals: {0,}
  %26 = add i64 %13, %25: {[-1]:Integer}, intvals: {0,}
  %49 = fmul float %48, %48: {[-1]:Float@float}, intvals: {}
  %50 = fadd float %43, %49: {[-1]:Float@float}, intvals: {}
  %51 = tail call float @llvm.sqrt.f32(float %50) #125: {[-1]:Float@float}, intvals: {}
  %52 = getelementptr inbounds [36 x float], ptr %4, i64 0, i64 %26: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %53 = add nuw nsw i64 %26, 1: {[-1]:Integer}, intvals: {1,}
  %54 = icmp ult i64 %28, 7: {[-1]:Integer}, intvals: {}
  call void @_ZN8rust_msa19polynomials_ethanol13f_polynomials17hc8d71dbfb7845995E(ptr noalias nocapture noundef nonnull sret([208 x float]) dereferenceable(832) %5, ptr noalias noundef nonnull readonly align 4 dereferenceable(144) %6) #125: {}, intvals: {}
  %60 = phi float [ 0.000000e+00, %56 ], [ %67, %58 ]: {[-1]:Float@float}, intvals: {}
  %62 = getelementptr inbounds [208 x float], ptr %2, i64 0, i64 %59: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %63 = load float, ptr %62, align 4, !noundef !13: {[-1]:Float@float}, intvals: {}
  %64 = getelementptr inbounds [208 x float], ptr %5, i64 0, i64 %59: {[-1]:Pointer, [-1,0]:Float@float}, intvals: {}
  %65 = load float, ptr %64, align 4, !noundef !13: {[-1]:Float@float}, intvals: {}
  %66 = fmul float %63, %65: {[-1]:Float@float}, intvals: {}
  %67 = fadd float %60, %66: {[-1]:Float@float}, intvals: {}
  %68 = icmp eq i64 %61, 208: {[-1]:Integer}, intvals: {}
  %14 = add nuw nsw i64 %12, 1: {[-1]:Integer}, intvals: {1,}
  %59 = phi i64 [ %61, %58 ], [ 0, %56 ]: {[-1]:Integer}, intvals: {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,}
i64 7: {[-1]:Integer}, intvals: {7,}
i64 9: {[-1]:Integer}, intvals: {9,}
i64 36: {[-1]:Integer}, intvals: {36,}
float 0.000000e+00: {[-1]:Anything}, intvals: {}
i64 208: {[-1]:Integer}, intvals: {208,}
i64 8: {[-1]:Integer}, intvals: {8,}
i64 1: {[-1]:Integer}, intvals: {1,}
i64 2: {[-1]:Integer}, intvals: {2,}
i64 3: {[-1]:Integer}, intvals: {3,}
i64 0: {[-1]:Anything}, intvals: {0,}
</analysis>


error: could not compile `rust_msa` (bin "rust_msa")

@ZuseZ4
Copy link
Member Author

ZuseZ4 commented Sep 18, 2023

define internal noundef float @preprocess__ZN8rust_msa8f_energy17h95acfd8e66f79358E(ptr noalias nocapture noundef readonly align 4 dereferenceable(108) %0, ptr noalias nocapture readnone align 4 %1, ptr noalias nocapture noundef readonly align 4 dereferenceable(832) %2) unnamed_addr #123 {
  %4 = alloca [36 x float], align 4
  %5 = alloca [208 x float], align 4
  %6 = alloca [36 x float], align 4
  call void @llvm.lifetime.start.p0(i64 144, ptr nonnull %6) #124
  tail call void @llvm.experimental.noalias.scope.decl(metadata !35470) #124
  call void @llvm.lifetime.start.p0(i64 144, ptr nonnull %4) #124, !noalias !35473
  call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(144) %4, i8 0, i64 144, i1 false) #124, !noalias !35473
  br label %11

7:                                                ; preds = %30
  br label %8

8:                                                ; preds = %7, %11
  %9 = phi i64 [ %13, %11 ], [ %53, %7 ]
  %10 = icmp eq i64 %14, 9
  br i1 %10, label %56, label %11

11:                                               ; preds = %8, %3
  %12 = phi i64 [ %14, %8 ], [ 0, %3 ]
  %13 = phi i64 [ %9, %8 ], [ 0, %3 ]
  %14 = add nuw nsw i64 %12, 1
  %15 = icmp ult i64 %12, 8
  br i1 %15, label %16, label %8

16:                                               ; preds = %11
  %17 = mul nuw nsw i64 %12, 3
  %18 = add nuw nsw i64 %17, 2
  %19 = add nuw nsw i64 %17, 1
  %20 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %19
  %21 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %18
  %22 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %17
  %23 = load float, ptr %22, align 4, !alias.scope !35475, !noalias !35478, !noundef !10
  br label %24

24:                                               ; preds = %30, %16
  %25 = phi i64 [ %27, %30 ], [ 0, %16 ]
  %26 = add i64 %13, %25
  %27 = add nuw nsw i64 %25, 1
  %28 = add i64 %12, %25
  %29 = icmp ult i64 %26, 36
  br i1 %29, label %30, label %55, !prof !42

30:                                               ; preds = %24
  %31 = add nuw nsw i64 %28, 1
  %32 = mul nuw nsw i64 %31, 3
  %33 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %32
  %34 = load float, ptr %33, align 4, !alias.scope !35475, !noalias !35478, !noundef !10
  %35 = fsub float %23, %34
  %36 = fmul float %35, %35
  %37 = load float, ptr %20, align 4, !alias.scope !35475, !noalias !35478, !noundef !10
  %38 = add nuw nsw i64 %32, 1
  %39 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %38
  %40 = load float, ptr %39, align 4, !alias.scope !35475, !noalias !35478, !noundef !10
  %41 = fsub float %37, %40
  %42 = fmul float %41, %41
  %43 = fadd float %36, %42
  %44 = load float, ptr %21, align 4, !alias.scope !35475, !noalias !35478, !noundef !10
  %45 = add nuw nsw i64 %32, 2
  %46 = getelementptr inbounds [27 x float], ptr %0, i64 0, i64 %45
  %47 = load float, ptr %46, align 4, !alias.scope !35475, !noalias !35478, !noundef !10
  %48 = fsub float %44, %47
  %49 = fmul float %48, %48
  %50 = fadd float %43, %49
  %51 = tail call float @llvm.sqrt.f32(float %50) #124
  %52 = getelementptr inbounds [36 x float], ptr %4, i64 0, i64 %26
  store float %51, ptr %52, align 4, !noalias !35473
  %53 = add nuw nsw i64 %26, 1
  %54 = icmp ult i64 %28, 7
  br i1 %54, label %24, label %7

55:                                               ; preds = %24
  tail call void @_ZN4core9panicking18panic_bounds_check17h780c23a7ac717e73E(i64 noundef %26, i64 noundef 36, ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @anon.a7b0d70a988dcb77c7acaf2945af91dc.2) #125, !noalias !35473
  unreachable

56:                                               ; preds = %8
  call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(144) %6, ptr noundef nonnull align 4 dereferenceable(144) %4, i64 144, i1 false) #124, !noalias !35470
  call void @llvm.lifetime.end.p0(i64 144, ptr nonnull %4) #124, !noalias !35473
  call void @llvm.lifetime.start.p0(i64 832, ptr nonnull %5) #124
  call void @_ZN8rust_msa19polynomials_ethanol13f_polynomials17hc8d71dbfb7845995E(ptr noalias nocapture noundef nonnull sret([208 x float]) dereferenceable(832) %5, ptr noalias noundef nonnull readonly align 4 dereferenceable(144) %6) #124
  br label %58

57:                                               ; preds = %58
  call void @llvm.lifetime.end.p0(i64 832, ptr nonnull %5) #124
  call void @llvm.lifetime.end.p0(i64 144, ptr nonnull %6) #124
  ret float %67

58:                                               ; preds = %58, %56
  %59 = phi i64 [ %61, %58 ], [ 0, %56 ]
  %60 = phi float [ 0.000000e+00, %56 ], [ %67, %58 ]
  %61 = add nuw nsw i64 %59, 1
  %62 = getelementptr inbounds [208 x float], ptr %2, i64 0, i64 %59
  %63 = load float, ptr %62, align 4, !noundef !10
  %64 = getelementptr inbounds [208 x float], ptr %5, i64 0, i64 %59
  %65 = load float, ptr %64, align 4, !noundef !10
  %66 = fmul float %63, %65
  %67 = fadd float %60, %66
  %68 = icmp eq i64 %61, 208
  br i1 %68, label %57, label %58
}

@ZuseZ4 ZuseZ4 closed this as completed Sep 18, 2023
ZuseZ4 pushed a commit that referenced this issue Sep 23, 2024
better implementation of signed div_floor/ceil

Tracking issue for signed `div_floor`/`div_ceil`: rust-lang#88581.

This PR improves the implementation of those two functions by adding a better branchless algorithm. Side-by-side comparison of `i32::div_floor` on x86-64:

```asm
div_floor_new:                               div_floor_old:
        push    rax                                  push    rax
        test    esi, esi                             test    esi, esi
        je      .LBB0_3                              je      .LBB1_6
        mov     eax, esi                             mov     eax, esi
        not     eax                                  not     eax
        lea     ecx, [rdi - 2147483648]              lea     ecx, [rdi - 2147483648]
        or      ecx, eax                             or      ecx, eax
        je      .LBB0_2                              je      .LBB1_7
        mov     eax, edi                             mov     eax, edi
        cdq                                          cdq
        idiv    esi                                  idiv    esi
        xor     esi, edi                             test    edx, edx
        sar     esi, 31                              setg    cl
        test    edx, edx                             test    esi, esi
        cmove   esi, edx                             sets    dil
        add     eax, esi                             test    dil, cl
        pop     rcx                                  jne     .LBB1_4
        ret                                          test    edx, edx
.LBB0_3:                                             setns   cl
        lea     rdi, [rip + .L__unnamed_1]           test    esi, esi
        call    qword ptr [rip + panic...]          setle   dl
.LBB0_2:                                             or      dl, cl
        lea     rdi, [rip + .L__unnamed_1]           jne     .LBB1_5
        call    qword ptr [rip + panic...]   .LBB1_4:
                                                     dec     eax
                                             .LBB1_5:
                                                     pop     rcx
                                                     ret
                                             .LBB1_6:
                                                     lea     rdi, [rip + .L__unnamed_2]
                                                     call    qword ptr [rip + panic...]
                                             .LBB1_7:
                                                     lea     rdi, [rip + .L__unnamed_2]
                                                     call    qword ptr [rip + panic...]
```

And on Aarch64:

```asm
_div_floor_new:                                   _div_floor_old:
        stp     x29, x30, [sp, #-16]!                     stp     x29, x30, [sp, #-16]!
        mov     x29, sp                                   mov     x29, sp
        cbz     w1, LBB0_4                                cbz     w1, LBB1_9
        mov     w8, #-2147483648                          mov     x8, x0
        cmp     w0, w8                                    mov     w9, #-2147483648
        b.ne    LBB0_3                                    cmp     w0, w9
        cmn     w1, #1                                    b.ne    LBB1_3
        b.eq    LBB0_5                                    cmn     w1, #1
LBB0_3:                                                   b.eq    LBB1_10
        sdiv    w8, w0, w1                        LBB1_3:
        msub    w9, w8, w1, w0                            sdiv    w0, w8, w1
        eor     w10, w1, w0                               msub    w8, w0, w1, w8
        asr     w10, w10, #31                             tbz     w1, #31, LBB1_5
        cmp     w9, #0                                    cmp     w8, #0
        csel    w9, wzr, w10, eq                          b.gt    LBB1_7
        add     w0, w9, w8                        LBB1_5:
        ldp     x29, x30, [sp], #16                       cmp     w1, #1
        ret                                               b.lt    LBB1_8
LBB0_4:                                                   tbz     w8, #31, LBB1_8
        adrp    x0, l___unnamed_1@PAGE            LBB1_7:
        add     x0, x0, l___unnamed_1@PAGEOFF             sub     w0, w0, #1
        bl      panic...                          LBB1_8:
LBB0_5:                                                   ldp     x29, x30, [sp], #16
        adrp    x0, l___unnamed_1@PAGE                    ret
        add     x0, x0, l___unnamed_1@PAGEOFF     LBB1_9:
        bl      panic...                                  adrp    x0, l___unnamed_2@PAGE
                                                          add     x0, x0, l___unnamed_2@PAGEOFF
                                                          bl      panic...
                                                  LBB1_10:
                                                          adrp    x0, l___unnamed_2@PAGE
                                                          add     x0, x0, l___unnamed_2@PAGEOFF
                                                          bl      panic...
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants