kulinseth · kulinseth · Aug 25, 2022 · Aug 23, 2022 · Aug 24, 2022
@@ -1468,7 +1468,7 @@ Tensor glu_backward_mps (const Tensor& grad_output,
  MPSStream* stream = getCurrentMPSStream();
 
  @autoreleasepool {
- string key = "softplus_out_mps:" + getTensorsStringKey({self});
+ string key = "softplus_out_mps:" + getTensorsStringKey({self}) + ":" + std::to_string(beta.to<double>()) + ":" + std::to_string(threshold.to<double>());
 
  CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
  if(!cachedGraph) {
@@ -1567,7 +1567,7 @@ Tensor glu_backward_mps (const Tensor& grad_output,
  MPSStream* stream = getCurrentMPSStream();
 
  @autoreleasepool {
- string key = "softplus_backward_out_mps:" + getTensorsStringKey({grad_output, self});
+ string key = "softplus_backward_out_mps:" + getTensorsStringKey({grad_output, self}) + ":" + std::to_string(beta.to<double>()) + ":" + std::to_string(threshold.to<double>());
 
  CachedGraph* cachedGraph = static_cast<CachedGraph *>(cache_->LookUp(key));
  if(!cachedGraph) {

@@ -3788,12 +3788,12 @@ def helper(shape, dim=0):
 
  # Test softplus
  def test_softplus(self):
- def helper(shape):
+ def helper(shape, beta=1, threshold=20):
  cpu_x = torch.randn(shape, device='cpu', dtype=torch.float, requires_grad=True)
  x = cpu_x.detach().clone().to('mps').requires_grad_()
 
- softplus_result = torch.nn.Softplus(beta=0.5, threshold=0.5)(x)
- softplus_result_cpu = torch.nn.Softplus(beta=0.5, threshold=0.5)(cpu_x)
+ softplus_result = torch.nn.Softplus(beta=beta, threshold=threshold)(x)
+ softplus_result_cpu = torch.nn.Softplus(beta=beta, threshold=threshold)(cpu_x)
 
  cpu_grad = torch.randn(softplus_result.shape)
  grad = cpu_grad.to('mps')
@@ -3806,7 +3806,9 @@ def helper(shape):
 
  # Test empty shape too
  for shape in [(), (2, 3), (10, 10), (2, 3, 4, 5)]:
- helper(shape)
+ for beta in [0.5, 1, 2, 3, 4]:
+ for threshold in [0.5, 20, 30, 40, 50]:
+ helper(shape, beta, threshold)
 
  # Test silu