@@ -35,6 +35,10 @@ llvm::cl::opt<bool>
35
35
EfficientBoolCache (" enzyme-smallbool" , cl::init(false ), cl::Hidden,
36
36
cl::desc(" Place 8 bools together in a single byte" ));
37
37
38
+ llvm::cl::opt<bool > EnzymeZeroCache (" enzyme-zero-cache" , cl::init(false ),
39
+ cl::Hidden,
40
+ cl::desc(" Zero initialize the cache" ));
41
+
38
42
llvm::cl::opt<bool >
39
43
EnzymePrintPerf (" enzyme-print-perf" , cl::init(false ), cl::Hidden,
40
44
cl::desc(" Enable Enzyme to print performance info" ));
@@ -640,6 +644,9 @@ AllocaInst *CacheUtility::createCacheForScope(LimitContext ctx, Type *T,
640
644
#endif
641
645
}
642
646
}
647
+ if (EnzymeZeroCache && sublimits.size () == 0 )
648
+ scopeInstructions[alloc].push_back (
649
+ entryBuilder.CreateStore (Constant::getNullValue (types.back ()), alloc));
643
650
644
651
Type *BPTy = Type::getInt8PtrTy (T->getContext ());
645
652
@@ -699,6 +706,20 @@ AllocaInst *CacheUtility::createCacheForScope(LimitContext ctx, Type *T,
699
706
cast<CallInst>(cast<Instruction>(firstallocation)->getOperand (0 ));
700
707
}
701
708
709
+ if (EnzymeZeroCache && i == 0 ) {
710
+ Value *args[] = {
711
+ malloccall,
712
+ ConstantInt::get (Type::getInt8Ty (malloccall->getContext ()), 0 ),
713
+ malloccall->getArgOperand (0 ),
714
+ ConstantInt::getFalse (malloccall->getContext ())};
715
+ Type *tys[] = {args[0 ]->getType (), args[2 ]->getType ()};
716
+
717
+ scopeInstructions[alloc].push_back (allocationBuilder.CreateCall (
718
+ Intrinsic::getDeclaration (newFunc->getParent (), Intrinsic::memset ,
719
+ tys),
720
+ args));
721
+ }
722
+
702
723
// Assert computation of size of array doesn't wrap
703
724
if (auto BI = dyn_cast<BinaryOperator>(malloccall->getArgOperand (0 ))) {
704
725
if ((BI->getOperand (0 ) == byteSizeOfType &&
@@ -788,8 +809,9 @@ AllocaInst *CacheUtility::createCacheForScope(LimitContext ctx, Type *T,
788
809
Value *realloccall = nullptr ;
789
810
790
811
realloccall = build.CreateCall (
791
- getOrInsertExponentialAllocator (*newFunc->getParent ()), idxs,
792
- name + " _realloccache" );
812
+ getOrInsertExponentialAllocator (*newFunc->getParent (),
813
+ EnzymeZeroCache && i == 0 ),
814
+ idxs, name + " _realloccache" );
793
815
scopeAllocs[alloc].push_back (cast<CallInst>(realloccall));
794
816
allocation = build.CreateBitCast (realloccall, allocation->getType ());
795
817
storealloc = build.CreateStore (allocation, storeInto);
0 commit comments