From 0e6bc32140f52434a72d1958096fbfd0aba194c3 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 16 May 2024 17:05:55 -0400
Subject: [PATCH 01/16] Implemented threshold

I implemented the threshold activation function as well as unit tests to verify its functionality.
---
 basalt/autograd/ops/mlops.mojo   | 260 +++++++++++++++++++++++--------
 basalt/autograd/ops/ops.mojo     |  95 ++++++++---
 basalt/nn/__init__.mojo          |   2 +-
 basalt/nn/activations.mojo       |  14 ++
 tests/mojo/test_activations.mojo |  61 +++++++-
 5 files changed, 337 insertions(+), 95 deletions(-)

diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo
index 08699199..4286420c 100644
--- a/basalt/autograd/ops/mlops.mojo
+++ b/basalt/autograd/ops/mlops.mojo
@@ -7,6 +7,64 @@ from basalt.utils.tensorutils import elwise_transform
 from basalt.autograd.attributes import Attribute, AttributeVector
 
 
+struct THRESHOLD:
+    @staticmethod
+    fn result_shape(t1_shape: TensorShape) -> TensorShape:
+        return t1_shape
+
+    @staticmethod
+    fn forward[
+        t1_shape: TensorShape,
+        attributes: AttributeVector,
+    ](inout res: Tensor[dtype], t1: Tensor[dtype]):
+        """Forward pass for threshold."""
+
+        alias THRESHOLD: Scalar[dtype] = attributes[
+            "threshold"
+        ].value().to_scalar[dtype]()
+
+        alias VALUE: Scalar[dtype] = attributes["value"].value().to_scalar[
+            dtype
+        ]()
+
+        @always_inline
+        fn threshold[
+            type: DType, simd_width: Int
+        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
+            return (x > x.splat(THRESHOLD.cast[type]())).select[type](
+                x, VALUE.cast[type]()
+            )  # Feels like using AttributeVector made this unnecessarily complicated
+
+        elwise_transform[threshold](res, t1)
+
+    @staticmethod
+    fn backward[
+        ug_shape: TensorShape,
+        t1_shape: TensorShape,
+        attributes: AttributeVector,
+    ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]:
+        """Backward pass for threshold."""
+        alias THRESHOLD: Scalar[dtype] = attributes[
+            "threshold"
+        ].value().to_scalar[dtype]()
+
+        alias VALUE: Scalar[dtype] = attributes["value"].value().to_scalar[
+            dtype
+        ]()
+
+        @always_inline
+        fn threshold[
+            type: DType, simd_width: Int
+        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
+            return (x > x.splat(THRESHOLD.cast[type]())).select[type](1, 0)
+
+        var res_grad = Tensor[dtype](t1_shape)
+
+        elwise_transform[threshold](res_grad, t1)
+
+        return res_grad^
+
+
 @value
 struct SIGMOID:
     @staticmethod
@@ -52,7 +110,7 @@ struct SIGMOID:
 
         vectorize[vec_sigmoid_bw, nelts](ug_shape.num_elements())
 
-        return res_grad ^
+        return res_grad^
 
 
 struct RELU:
@@ -100,7 +158,7 @@ struct RELU:
 
         vectorize[vec_relu_bw, nelts](ug_shape.num_elements())
 
-        return res_grad ^
+        return res_grad^
 
 
 struct TANH:
@@ -146,7 +204,7 @@ struct TANH:
 
         vectorize[vec_tanh_bw, nelts](ug_shape.num_elements())
 
-        return res_grad ^
+        return res_grad^
 
 
 struct CLIP:
@@ -164,12 +222,12 @@ struct CLIP:
         alias min_attr = attributes["min"]
         alias max_attr = attributes["max"]
 
-        var min_val = min_attr.value().to_scalar[dtype]() if min_attr else min_finite[
+        var min_val = min_attr.value().to_scalar[
             dtype
-        ]()
-        var max_val = max_attr.value().to_scalar[dtype]() if max_attr else max_finite[
+        ]() if min_attr else min_finite[dtype]()
+        var max_val = max_attr.value().to_scalar[
             dtype
-        ]()
+        ]() if max_attr else max_finite[dtype]()
 
         @parameter
         fn vec_clip[nelts: Int](i: Int):
@@ -187,12 +245,12 @@ struct CLIP:
         alias min_attr = attributes["min"]
         alias max_attr = attributes["max"]
 
-        var min_val = min_attr.value().to_scalar[dtype]() if min_attr else min_finite[
+        var min_val = min_attr.value().to_scalar[
             dtype
-        ]()
-        var max_val = max_attr.value().to_scalar[dtype]() if max_attr else max_finite[
+        ]() if min_attr else min_finite[dtype]()
+        var max_val = max_attr.value().to_scalar[
             dtype
-        ]()
+        ]() if max_attr else max_finite[dtype]()
 
         var res_grad = Tensor[dtype](t_shape)
 
@@ -201,17 +259,21 @@ struct CLIP:
             var val = t.load[nelts](i)
             res_grad.store[nelts](
                 i,
-                ((val >= min_val) * (val <= max_val)).select(ug.load[nelts](i), 0),
+                ((val >= min_val) * (val <= max_val)).select(
+                    ug.load[nelts](i), 0
+                ),
             )
 
         vectorize[vec_clip_bw, nelts, size = t_shape.num_elements()]()
 
-        return res_grad ^
+        return res_grad^
 
 
 struct SQUEEZE:
     @staticmethod
-    fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape:
+    fn result_shape(
+        t1_shape: TensorShape, attributes: AttributeVector
+    ) -> TensorShape:
         var dim = attributes["dims"]
         var dims_to_squeeze = dim.value().to_shape() if dim else TensorShape()
 
@@ -239,12 +301,14 @@ struct SQUEEZE:
     ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]:
         var res_grad = Tensor[dtype](t1_shape)
         memcpy(res_grad.data(), ug.data(), ug.num_elements())
-        return res_grad ^
+        return res_grad^
 
 
 struct UNSQUEEZE:
     @staticmethod
-    fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape:
+    fn result_shape(
+        t1_shape: TensorShape, attributes: AttributeVector
+    ) -> TensorShape:
         var dim = attributes["dims"]
         var dims_to_squeeze = dim.value().to_shape() if dim else TensorShape()
 
@@ -276,7 +340,7 @@ struct UNSQUEEZE:
     ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]:
         var res_grad = Tensor[dtype](t1_shape)
         memcpy(res_grad.data(), ug.data(), ug.num_elements())
-        return res_grad ^
+        return res_grad^
 
 
 struct SLICE:
@@ -285,7 +349,7 @@ struct SLICE:
         # Adjust negative indices & ensure they are within bounds.
         var s = slice if slice >= 0 else dim_size + slice
         return max(min(s, dim_size), 0)
-    
+
     @staticmethod
     fn default_starts(shape: TensorShape) -> List[Int]:
         var starts = List[Int]()
@@ -306,7 +370,7 @@ struct SLICE:
         for i in range(shape.rank()):
             steps.append(1)
         return steps^
-    
+
     @staticmethod
     fn default_axes(shape: TensorShape) -> List[Int]:
         # NOTE: axes can't be negative
@@ -316,38 +380,55 @@ struct SLICE:
         return axes^
 
     @staticmethod
-    fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape:
+    fn result_shape(
+        t1_shape: TensorShape, attributes: AttributeVector
+    ) -> TensorShape:
         # NOTE: Starts and ends have to be of the same size
         # NOTE: If axes not provided, starts and ends have to be of the same size as t1_shape
         var starts = attributes["starts"].value().to_shape()
         var ends = attributes["ends"].value().to_shape()
-        var steps = attributes["steps"].value().to_shape() if attributes["steps"] else Self.default_steps(starts)
-        var axes = attributes["axes"].value().to_shape() if attributes["axes"] else Self.default_axes(t1_shape)
+        var steps = attributes["steps"].value().to_shape() if attributes[
+            "steps"
+        ] else Self.default_steps(starts)
+        var axes = attributes["axes"].value().to_shape() if attributes[
+            "axes"
+        ] else Self.default_axes(t1_shape)
 
         var new_shape = t1_shape
         for i in range(starts.rank()):
             var axis = axes[i]
-            new_shape[axis] = len(range(
-                start = Self.adjust_boundary(starts[i], t1_shape[axis]),
-                end = Self.adjust_boundary(ends[i], t1_shape[axis]),
-                step = steps[i]
-            ))
+            new_shape[axis] = len(
+                range(
+                    start=Self.adjust_boundary(starts[i], t1_shape[axis]),
+                    end=Self.adjust_boundary(ends[i], t1_shape[axis]),
+                    step=steps[i],
+                )
+            )
 
         return new_shape
 
     @staticmethod
-    fn reorder_positions[id: Int](original: TensorShape, axes: TensorShape, t1_shape: TensorShape) -> List[Int]:
+    fn reorder_positions[
+        id: Int
+    ](original: TensorShape, axes: TensorShape, t1_shape: TensorShape) -> List[
+        Int
+    ]:
         # Reorder the starts (id=0), ends (id=1) or steps (id=2) to match the order of the axes
         var updated: List[Int]
 
         @parameter
-        if id == 0: updated = Self.default_starts(t1_shape)
-        elif id == 1: updated = Self.default_ends(t1_shape)
-        else: updated = Self.default_steps(t1_shape)
-    
+        if id == 0:
+            updated = Self.default_starts(t1_shape)
+        elif id == 1:
+            updated = Self.default_ends(t1_shape)
+        else:
+            updated = Self.default_steps(t1_shape)
+
         for i in range(axes.rank()):
             var axis = axes[i]
-            updated[axis] = original[i] if id == 2 else Self.adjust_boundary(original[i], t1_shape[axis])
+            updated[axis] = original[i] if id == 2 else Self.adjust_boundary(
+                original[i], t1_shape[axis]
+            )
 
         return updated^
 
@@ -360,12 +441,12 @@ struct SLICE:
         steps: List[Int],
         starts: List[Int],
         ends: List[Int],
-        backward_op: Bool = False
+        backward_op: Bool = False,
     ](
         inout res: Tensor[dtype],
         t1: Tensor[dtype],
         last_dims: Int,
-        position: Int, 
+        position: Int,
         last_position: Int,
         idx: Int,
         idx_original: Int,
@@ -374,7 +455,9 @@ struct SLICE:
         alias t1_strides = original_shape.strides()
 
         var idx_temp = idx
-        var idx_original_temp = starts[position] * t1_strides[position] + idx_original
+        var idx_original_temp = starts[position] * t1_strides[
+            position
+        ] + idx_original
 
         if position == last_position + 1:
             # Work on the last dimensions
@@ -382,37 +465,51 @@ struct SLICE:
             alias stride = t1_strides[position] * steps[position]
 
             @parameter
-            fn v_slice[nelts: Int](k : Int):
-
+            fn v_slice[nelts: Int](k: Int):
                 @parameter
                 if not backward_op:
+
                     @parameter
                     if steps[position] == 1:
-                        res.store[nelts](idx_temp + k, t1.load[nelts](idx_original_temp))
+                        res.store[nelts](
+                            idx_temp + k, t1.load[nelts](idx_original_temp)
+                        )
                     else:
                         res.store[nelts](
                             idx_temp + k,
-                            t1.data().offset(idx_original_temp).simd_strided_load[nelts](stride)
+                            t1.data()
+                            .offset(idx_original_temp)
+                            .simd_strided_load[nelts](stride),
                         )
                 else:
+
                     @parameter
                     if steps[position] == 1:
-                        res.store[nelts](idx_original_temp, t1.load[nelts](idx_temp + k))
-                    else:
-                        res.data().offset(idx_original_temp).simd_strided_store[nelts](
-                            t1.load[nelts](idx_temp + k),
-                            stride
+                        res.store[nelts](
+                            idx_original_temp, t1.load[nelts](idx_temp + k)
                         )
-    
+                    else:
+                        res.data().offset(idx_original_temp).simd_strided_store[
+                            nelts
+                        ](t1.load[nelts](idx_temp + k), stride)
+
                 idx_original_temp += stride * nelts
 
             vectorize[v_slice, nelts](last_dims)
 
-            return 
+            return
 
         for _ in range(shape[position]):
-            Self.recursive_iters_slice[shape, original_shape, steps, starts, ends, backward_op](
-                res, t1, last_dims, position + 1, last_position, idx_temp, idx_original_temp
+            Self.recursive_iters_slice[
+                shape, original_shape, steps, starts, ends, backward_op
+            ](
+                res,
+                t1,
+                last_dims,
+                position + 1,
+                last_position,
+                idx_temp,
+                idx_original_temp,
             )
 
             idx_temp += strides[position]
@@ -425,10 +522,10 @@ struct SLICE:
         steps: List[Int],
         starts: List[Int],
         ends: List[Int],
-        backward_op: Bool = False
+        backward_op: Bool = False,
     ](inout res: Tensor[dtype], t1: Tensor[dtype]):
         alias strides = original_shape.strides()
-        
+
         # Get the dimensions for vectorization
         var last_dims = 1
         var positions_to_skip = 0
@@ -439,7 +536,7 @@ struct SLICE:
             positions_to_skip += 1
             if starts[i] != 0 or ends[i] != original_shape[i] or steps[i] != 1:
                 break
-        
+
         # Get the dimensions for the first loop
         var first_dims = 1
         var start_position = 0
@@ -450,31 +547,46 @@ struct SLICE:
             start_position += 1
 
         var middle_dims = res_shape.num_elements() // last_dims // first_dims
-        
+
         @parameter
         fn p_slice(i: Int):
             Self.recursive_iters_slice[
                 res_shape, original_shape, steps, starts, ends, backward_op
             ](
-                res, t1, last_dims, start_position, res_shape.rank() - 1 - positions_to_skip, 
-                i * middle_dims * last_dims, i * strides[start_position - 1]
+                res,
+                t1,
+                last_dims,
+                start_position,
+                res_shape.rank() - 1 - positions_to_skip,
+                i * middle_dims * last_dims,
+                i * strides[start_position - 1],
             )
 
         parallelize[p_slice](first_dims)
-    
+
     @staticmethod
     fn forward[
         t1_shape: TensorShape,
         attributes: AttributeVector,
     ](inout res: Tensor[dtype], t1: Tensor[dtype]):
-        alias axes = attributes["axes"].value().to_shape() if attributes["axes"] else Self.default_axes(t1_shape)
-        alias starts = Self.reorder_positions[0](attributes["starts"].value().to_shape(), axes, t1_shape)
-        alias ends = Self.reorder_positions[1](attributes["ends"].value().to_shape(), axes, t1_shape)
-        alias steps = Self.reorder_positions[2](attributes["steps"].value().to_shape(), axes, t1_shape) if attributes["steps"] else Self.default_steps(t1_shape)
+        alias axes = attributes["axes"].value().to_shape() if attributes[
+            "axes"
+        ] else Self.default_axes(t1_shape)
+        alias starts = Self.reorder_positions[0](
+            attributes["starts"].value().to_shape(), axes, t1_shape
+        )
+        alias ends = Self.reorder_positions[1](
+            attributes["ends"].value().to_shape(), axes, t1_shape
+        )
+        alias steps = Self.reorder_positions[2](
+            attributes["steps"].value().to_shape(), axes, t1_shape
+        ) if attributes["steps"] else Self.default_steps(t1_shape)
 
         alias res_shape = Self.result_shape(t1_shape, attributes)
 
-        Self.slice_kernel[res_shape, t1_shape, steps, starts, ends, False](res, t1)
+        Self.slice_kernel[res_shape, t1_shape, steps, starts, ends, False](
+            res, t1
+        )
 
     @staticmethod
     fn backward[
@@ -482,13 +594,23 @@ struct SLICE:
         t1_shape: TensorShape,
         attributes: AttributeVector = AttributeVector(),
     ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]:
-        alias axes = attributes["axes"].value().to_shape() if attributes["axes"] else Self.default_axes(t1_shape)
-        alias starts = Self.reorder_positions[0](attributes["starts"].value().to_shape(), axes, t1_shape)
-        alias ends = Self.reorder_positions[1](attributes["ends"].value().to_shape(), axes, t1_shape)
-        alias steps = Self.reorder_positions[2](attributes["steps"].value().to_shape(), axes, t1_shape) if attributes["steps"] else Self.default_steps(t1_shape)
+        alias axes = attributes["axes"].value().to_shape() if attributes[
+            "axes"
+        ] else Self.default_axes(t1_shape)
+        alias starts = Self.reorder_positions[0](
+            attributes["starts"].value().to_shape(), axes, t1_shape
+        )
+        alias ends = Self.reorder_positions[1](
+            attributes["ends"].value().to_shape(), axes, t1_shape
+        )
+        alias steps = Self.reorder_positions[2](
+            attributes["steps"].value().to_shape(), axes, t1_shape
+        ) if attributes["steps"] else Self.default_steps(t1_shape)
 
         var res_grad = Tensor[dtype](t1_shape)
-        
-        Self.slice_kernel[ug_shape, t1_shape, steps, starts, ends, True](res_grad, ug)
-        
-        return res_grad ^
\ No newline at end of file
+
+        Self.slice_kernel[ug_shape, t1_shape, steps, starts, ends, True](
+            res_grad, ug
+        )
+
+        return res_grad^
diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo
index 71982706..49b87bc8 100644
--- a/basalt/autograd/ops/ops.mojo
+++ b/basalt/autograd/ops/ops.mojo
@@ -15,7 +15,16 @@ from .basics import (
     TRANSPOSE,
     FMA,
 )
-from .mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE, SLICE
+from .mlops import (
+    THRESHOLD,
+    SIGMOID,
+    RELU,
+    TANH,
+    CLIP,
+    SQUEEZE,
+    UNSQUEEZE,
+    SLICE,
+)
 from .dynamics import CONCAT, SPLIT
 from .conv import CONV2D
 from .pool import MAXPOOL2D
@@ -61,6 +70,7 @@ struct OP(Stringable):
     alias CONCAT = OP(23, "CONCAT", dynamic=True)
     alias SPLIT = OP(24, "SPLIT", dynamic=True)
     alias SLICE = OP(25, "SLICE")
+    alias THRESHOLD = OP(26, "THRESHOLD")
 
     var id: UInt8
     var name: Bytes[16]
@@ -87,10 +97,16 @@ fn static_result_shape(
     if len(operands) == 1:
         return static_result_shape(op, operands[0].shape, attributes)
     elif len(operands) == 2:
-        return static_result_shape(op, operands[0].shape, operands[1].shape, attributes)
+        return static_result_shape(
+            op, operands[0].shape, operands[1].shape, attributes
+        )
     elif len(operands) == 3:
         return static_result_shape(
-            op, operands[0].shape, operands[1].shape, operands[2].shape, attributes
+            op,
+            operands[0].shape,
+            operands[1].shape,
+            operands[2].shape,
+            attributes,
         )
     else:
         print("Error: Invalid number of operands")
@@ -117,6 +133,8 @@ fn static_result_shape(
         return FLATTEN.result_shape(t1_shape)
     elif op == OP.RESHAPE:
         return RESHAPE.result_shape(t1_shape, attributes)
+    elif op == OP.THRESHOLD:
+        return THRESHOLD.result_shape(t1_shape)
     elif op == OP.SIGMOID:
         return SIGMOID.result_shape(t1_shape)
     elif op == OP.RELU:
@@ -231,6 +249,8 @@ fn forward_op[
         FLATTEN.forward[t1_shape](res, t1)
     elif op == OP.RESHAPE:
         RESHAPE.forward[t1_shape](res, t1)
+    elif op == OP.THRESHOLD:
+        THRESHOLD.forward[t1_shape, attributes](res, t1)
     elif op == OP.SIGMOID:
         SIGMOID.forward[t1_shape](res, t1)
     elif op == OP.RELU:
@@ -254,7 +274,10 @@ fn forward_op[
 
 
 fn forward_op[
-    op: OP, t1_shape: TensorShape, t2_shape: TensorShape, attributes: AttributeVector
+    op: OP,
+    t1_shape: TensorShape,
+    t2_shape: TensorShape,
+    attributes: AttributeVector,
 ](inout res: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype]):
     """
     Forward pass for binary operators.
@@ -283,14 +306,21 @@ fn forward_op[
     t2_shape: TensorShape,
     t3_shape: TensorShape,
     attributes: AttributeVector,
-](inout res: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype], t3: Tensor[dtype]):
+](
+    inout res: Tensor[dtype],
+    t1: Tensor[dtype],
+    t2: Tensor[dtype],
+    t3: Tensor[dtype],
+):
     """
     Forward pass for ternary operators.
     """
 
     @parameter
     if op == OP.CONV2D:
-        CONV2D.forward[t1_shape, t2_shape, t3_shape, attributes](res, t1, t2, t3)
+        CONV2D.forward[t1_shape, t2_shape, t3_shape, attributes](
+            res, t1, t2, t3
+        )
     elif op == OP.FMA:
         FMA.forward[t1_shape, t2_shape, t3_shape](res, t1, t2, t3)
     else:
@@ -300,11 +330,7 @@ fn forward_op[
 fn forward_op[
     op: OP,
     attributes: AttributeVector,
-](
-    inputs: List[Symbol],
-    outputs: List[Symbol],
-    parameters: Parameters,
-):
+](inputs: List[Symbol], outputs: List[Symbol], parameters: Parameters,):
     """
     Forward pass for dynamic operators.
     """
@@ -343,6 +369,8 @@ fn backward_op[
         res_grad = FLATTEN.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.RESHAPE:
         res_grad = RESHAPE.backward[ug_shape, t1_shape](ug, t1)
+    elif op == OP.THRESHOLD:
+        res_grad = THRESHOLD.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.SIGMOID:
         res_grad = SIGMOID.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.RELU:
@@ -375,7 +403,12 @@ fn backward_op[
     t1_shape: TensorShape,
     t2_shape: TensorShape,
     attributes: AttributeVector,
-](ug: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype], inout grad: Tensor[dtype]):
+](
+    ug: Tensor[dtype],
+    t1: Tensor[dtype],
+    t2: Tensor[dtype],
+    inout grad: Tensor[dtype],
+):
     """
     Backward pass for binary operators.
     """
@@ -383,17 +416,29 @@ fn backward_op[
 
     @parameter
     if op == OP.ADD:
-        res_grad = ADD.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2)
+        res_grad = ADD.backward[tensor_id, ug_shape, t1_shape, t2_shape](
+            ug, t1, t2
+        )
     elif op == OP.SUB:
-        res_grad = SUB.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2)
+        res_grad = SUB.backward[tensor_id, ug_shape, t1_shape, t2_shape](
+            ug, t1, t2
+        )
     elif op == OP.MUL:
-        res_grad = MUL.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2)
+        res_grad = MUL.backward[tensor_id, ug_shape, t1_shape, t2_shape](
+            ug, t1, t2
+        )
     elif op == OP.DIV:
-        res_grad = DIV.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2)
+        res_grad = DIV.backward[tensor_id, ug_shape, t1_shape, t2_shape](
+            ug, t1, t2
+        )
     elif op == OP.POW:
-        res_grad = POW.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2)
+        res_grad = POW.backward[tensor_id, ug_shape, t1_shape, t2_shape](
+            ug, t1, t2
+        )
     elif op == OP.DOT:
-        res_grad = DOT.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2)
+        res_grad = DOT.backward[tensor_id, ug_shape, t1_shape, t2_shape](
+            ug, t1, t2
+        )
     else:
         print("[ERROR] Operator not found.")
         res_grad = Tensor[dtype](-1, -1)
@@ -437,9 +482,9 @@ fn backward_op[
             tensor_id, ug_shape, t1_shape, t2_shape, t3_shape, attributes
         ](ug, t1, t2, t3)
     elif op == OP.FMA:
-        res_grad = FMA.backward[tensor_id, ug_shape, t1_shape, t2_shape, t3_shape](
-            ug, t1, t2, t3
-        )
+        res_grad = FMA.backward[
+            tensor_id, ug_shape, t1_shape, t2_shape, t3_shape
+        ](ug, t1, t2, t3)
     else:
         print("[ERROR] Operator not found.")
         res_grad = Tensor[dtype](-1, -1)
@@ -463,9 +508,13 @@ fn backward_op[
     var res_grad: Tensor[dtype]
 
     if op == OP.CONCAT:
-        res_grad = CONCAT.backward[input_id, attributes](inputs, outputs, parameters)
+        res_grad = CONCAT.backward[input_id, attributes](
+            inputs, outputs, parameters
+        )
     elif op == OP.SPLIT:
-        res_grad = SPLIT.backward[input_id, attributes](inputs, outputs, parameters)
+        res_grad = SPLIT.backward[input_id, attributes](
+            inputs, outputs, parameters
+        )
     else:
         print("[ERROR] Operator not found.")
         res_grad = Tensor[dtype](-1, -1)
diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo
index 99b30a31..3ff64121 100644
--- a/basalt/nn/__init__.mojo
+++ b/basalt/nn/__init__.mojo
@@ -6,4 +6,4 @@ from .layers.conv import Conv2d
 from .layers.pool import MaxPool2d
 
 from .loss import MSELoss, CrossEntropyLoss
-from .activations import Softmax, LogSoftmax, ReLU, Sigmoid, Tanh
+from .activations import Softmax, LogSoftmax, ReLU, Sigmoid, Tanh, Threshold
diff --git a/basalt/nn/activations.mojo b/basalt/nn/activations.mojo
index 2264a541..44ecaa1d 100644
--- a/basalt/nn/activations.mojo
+++ b/basalt/nn/activations.mojo
@@ -2,7 +2,21 @@ from basalt import Tensor, TensorShape
 from basalt import Graph, Symbol, OP
 from basalt.autograd.attributes import Attribute, AttributeVector
 
+
 # '''Activation functions.'''
+fn Threshold(
+    inout g: Graph,
+    input: Symbol,
+    threshold: Scalar[dtype],
+    value: Scalar[dtype],
+) -> Symbol:
+    return g.op(
+        OP.THRESHOLD,
+        input,
+        attributes=AttributeVector(
+            Attribute("threshold", threshold), Attribute("value", value)
+        ),
+    )
 
 
 fn ReLU(inout g: Graph, input: Symbol) -> Symbol:
diff --git a/tests/mojo/test_activations.mojo b/tests/mojo/test_activations.mojo
index 200215d9..bb6e92fe 100644
--- a/tests/mojo/test_activations.mojo
+++ b/tests/mojo/test_activations.mojo
@@ -10,6 +10,7 @@ from basalt.nn import (
     ReLU,
     Sigmoid,
     Tanh,
+    Threshold,
 )
 from basalt.autograd import Graph, Symbol
 from basalt.utils.tensorutils import fill
@@ -19,6 +20,12 @@ from tests import assert_tensors_equal
 
 alias Activation = fn (inout g: Graph, input: Symbol) -> Symbol
 alias AxisActivation = fn (inout g: Graph, input: Symbol, axis: Int) -> Symbol
+alias ThresholdActivation = fn (
+    inout g: Graph,
+    input: Symbol,
+    threshold: Scalar[dtype],
+    value: Scalar[dtype],
+) -> Symbol
 
 
 fn create_graph[
@@ -30,7 +37,7 @@ fn create_graph[
     var x = g.input(shape)
     var activation = func(g, x, axis)
     g.out(activation)
-    return g ^
+    return g^
 
 
 fn create_graph[shape: TensorShape, func: Activation]() -> Graph:
@@ -38,7 +45,36 @@ fn create_graph[shape: TensorShape, func: Activation]() -> Graph:
     var x = g.input(shape)
     var activation = func(g, x)
     g.out(activation)
-    return g ^
+    return g^
+
+
+fn create_graph[
+    shape: TensorShape,
+    func: ThresholdActivation,
+    threshold: Scalar[dtype],
+    value: Scalar[dtype],
+]() -> Graph:
+    var g = Graph()
+    var x = g.input(shape)
+    var activation = func(g, x, threshold, value)
+    g.out(activation)
+    return g^
+
+
+fn test_graph[
+    shape: TensorShape,
+    func: ThresholdActivation,
+    nodes: Int,
+    threshold: Scalar[dtype],
+    value: Scalar[dtype],
+](input: Tensor[dtype], expected: Tensor[dtype]) raises:
+    alias graph = create_graph[shape, func, threshold, value]()
+
+    var model = Model[graph](inference_only=True)
+    var res = model.inference(input)[0]
+
+    assert_tensors_equal["almost"](res, expected)
+    assert_equal(len(graph.nodes), nodes)
 
 
 fn test_graph[
@@ -56,6 +92,7 @@ fn test_graph[
     assert_equal(len(graph.nodes), nodes)
 
 
+# TODO: All these overloads feel redundant. Find a way to condense them
 fn test_graph[
     shape: TensorShape,
     func: Activation,
@@ -70,6 +107,26 @@ fn test_graph[
     assert_equal(len(graph.nodes), nodes, "Node count failed")
 
 
+fn test_THRESHOLD() raises:
+    alias shape = TensorShape(2, 3)
+    alias nodes = 1
+
+    alias THRESHOLD = 3
+    alias VALUE = 2
+
+    var input = Tensor[dtype](shape)
+
+    for i in range(6):
+        input[i] = i
+
+    var expected = Tensor[dtype](shape)
+
+    for i in range(6):
+        expected[i] = i if i > THRESHOLD else VALUE
+
+    test_graph[shape, Threshold, nodes, THRESHOLD, VALUE](input, expected)
+
+
 fn test_SOFTMAX() raises:
     alias shape = TensorShape(2, 3, 2)
     alias nodes = 5

From 458f1d40f17df619f45121aa7f8a4a4a2e650a60 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 16 May 2024 19:40:03 -0400
Subject: [PATCH 02/16] Reduced cast operations for threshold

---
 basalt/autograd/ops/mlops.mojo | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo
index 4286420c..8792587c 100644
--- a/basalt/autograd/ops/mlops.mojo
+++ b/basalt/autograd/ops/mlops.mojo
@@ -31,9 +31,10 @@ struct THRESHOLD:
         fn threshold[
             type: DType, simd_width: Int
         ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
-            return (x > x.splat(THRESHOLD.cast[type]())).select[type](
-                x, VALUE.cast[type]()
-            )  # Feels like using AttributeVector made this unnecessarily complicated
+            alias casted_threshold = THRESHOLD.cast[type]()
+            alias casted_value = VALUE.cast[type]()
+
+            return (x > x.splat(casted_threshold)).select[type](x, casted_value)
 
         elwise_transform[threshold](res, t1)
 

From 3ec559f7d728854515c4d4d35e3b817368cd55bc Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 16 May 2024 19:42:46 -0400
Subject: [PATCH 03/16] Fixed naming convention internally in threshold

---
 basalt/autograd/ops/mlops.mojo | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo
index 8792587c..9be839ec 100644
--- a/basalt/autograd/ops/mlops.mojo
+++ b/basalt/autograd/ops/mlops.mojo
@@ -54,14 +54,14 @@ struct THRESHOLD:
         ]()
 
         @always_inline
-        fn threshold[
+        fn threshold_bw[
             type: DType, simd_width: Int
         ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
             return (x > x.splat(THRESHOLD.cast[type]())).select[type](1, 0)
 
         var res_grad = Tensor[dtype](t1_shape)
 
-        elwise_transform[threshold](res_grad, t1)
+        elwise_transform[threshold_bw](res_grad, t1)
 
         return res_grad^
 

From 5d97da5bc26882798f3fe9e4df9c7810e8297a1e Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 16 May 2024 19:47:03 -0400
Subject: [PATCH 04/16] Fixed bug with backward for threshold

---
 basalt/autograd/ops/ops.mojo | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo
index 49b87bc8..48364b2f 100644
--- a/basalt/autograd/ops/ops.mojo
+++ b/basalt/autograd/ops/ops.mojo
@@ -370,7 +370,7 @@ fn backward_op[
     elif op == OP.RESHAPE:
         res_grad = RESHAPE.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.THRESHOLD:
-        res_grad = THRESHOLD.backward[ug_shape, t1_shape](ug, t1)
+        res_grad = THRESHOLD.backward[ug_shape, t1_shape, attributes](ug, t1)
     elif op == OP.SIGMOID:
         res_grad = SIGMOID.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.RELU:

From dcacce1c63e231426646343af304383c6370771a Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 16 May 2024 20:18:26 -0400
Subject: [PATCH 05/16] Implemented Hardtanh

---
 basalt/autograd/ops/ops.mojo     |  8 ++++++++
 basalt/nn/__init__.mojo          | 10 +++++++++-
 basalt/nn/activations.mojo       | 15 +++++++++++++++
 tests/mojo/test_activations.mojo | 31 +++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo
index 48364b2f..c0d0db1c 100644
--- a/basalt/autograd/ops/ops.mojo
+++ b/basalt/autograd/ops/ops.mojo
@@ -20,6 +20,7 @@ from .mlops import (
     SIGMOID,
     RELU,
     TANH,
+    HARDTANH,
     CLIP,
     SQUEEZE,
     UNSQUEEZE,
@@ -71,6 +72,7 @@ struct OP(Stringable):
     alias SPLIT = OP(24, "SPLIT", dynamic=True)
     alias SLICE = OP(25, "SLICE")
     alias THRESHOLD = OP(26, "THRESHOLD")
+    alias HARDTANH = OP(27, "HARDTANH")
 
     var id: UInt8
     var name: Bytes[16]
@@ -141,6 +143,8 @@ fn static_result_shape(
         return RELU.result_shape(t1_shape)
     elif op == OP.TANH:
         return TANH.result_shape(t1_shape)
+    elif op == OP.HARDTANH:
+        return HARDTANH.result_shape(t1_shape)
     elif op == OP.TRANSPOSE:
         return TRANSPOSE.result_shape(t1_shape, attributes)
     elif op == OP.MAXPOOL2D:
@@ -257,6 +261,8 @@ fn forward_op[
         RELU.forward[t1_shape](res, t1)
     elif op == OP.TANH:
         TANH.forward[t1_shape](res, t1)
+    elif op == OP.HARDTANH:
+        HARDTANH.forward[t1_shape, attributes](res, t1)
     elif op == OP.TRANSPOSE:
         TRANSPOSE.forward[t1_shape, attributes](res, t1)
     elif op == OP.MAXPOOL2D:
@@ -377,6 +383,8 @@ fn backward_op[
         res_grad = RELU.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.TANH:
         res_grad = TANH.backward[ug_shape, t1_shape](ug, t1)
+    elif op == OP.HARDTANH:
+        res_grad = HARDTANH.backward[ug_shape, t1_shape, attributes](ug, t1)
     elif op == OP.TRANSPOSE:
         res_grad = TRANSPOSE.backward[ug_shape, t1_shape, attributes](ug, t1)
     elif op == OP.MAXPOOL2D:
diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo
index 3ff64121..855ac8c8 100644
--- a/basalt/nn/__init__.mojo
+++ b/basalt/nn/__init__.mojo
@@ -6,4 +6,12 @@ from .layers.conv import Conv2d
 from .layers.pool import MaxPool2d
 
 from .loss import MSELoss, CrossEntropyLoss
-from .activations import Softmax, LogSoftmax, ReLU, Sigmoid, Tanh, Threshold
+from .activations import (
+    Softmax,
+    LogSoftmax,
+    ReLU,
+    Sigmoid,
+    Tanh,
+    Hardtanh,
+    Threshold,
+)
diff --git a/basalt/nn/activations.mojo b/basalt/nn/activations.mojo
index 44ecaa1d..b0421212 100644
--- a/basalt/nn/activations.mojo
+++ b/basalt/nn/activations.mojo
@@ -31,6 +31,21 @@ fn Tanh(inout g: Graph, input: Symbol) -> Symbol:
     return g.op(OP.TANH, input)
 
 
+fn Hardtanh(
+    inout g: Graph,
+    input: Symbol,
+    min_val: Scalar[dtype],
+    max_val: Scalar[dtype],
+) -> Symbol:
+    return g.op(
+        OP.HARDTANH,
+        input,
+        attributes=AttributeVector(
+            Attribute("min_val", min_val), Attribute("max_val", max_val)
+        ),
+    )
+
+
 fn Softmax(inout g: Graph, input: Symbol, axis: Int) -> Symbol:
     # softmax: exp(x_i) / sum(exp(x_j))
     # stable softmax: exp(x_i - max(x_j)) / sum(exp(x_j - max(x_j)))
diff --git a/tests/mojo/test_activations.mojo b/tests/mojo/test_activations.mojo
index bb6e92fe..88330b16 100644
--- a/tests/mojo/test_activations.mojo
+++ b/tests/mojo/test_activations.mojo
@@ -10,6 +10,7 @@ from basalt.nn import (
     ReLU,
     Sigmoid,
     Tanh,
+    Hardtanh,
     Threshold,
 )
 from basalt.autograd import Graph, Symbol
@@ -208,13 +209,43 @@ fn test_TANH() raises:
     test_graph[shape, Tanh, nodes](input, expected)
 
 
+fn test_HARDTANH() raises:
+    alias shape = TensorShape(3, 3)
+    alias nodes = 1
+
+    alias MIN_VAL = -2
+    alias MAX_VAL = 2
+
+    var input = Tensor[dtype](shape)
+
+    for i in range(9):
+        input[i] = i - 4
+
+    var expected = Tensor[dtype](shape)
+
+    for j in range(0, 9):
+        var i = j - 4
+        if i < MIN_VAL:
+            expected[j] = MIN_VAL
+
+        elif i > MAX_VAL:
+            expected[j] = MAX_VAL
+
+        else:
+            expected[j] = i
+
+    test_graph[shape, Hardtanh, nodes, MIN_VAL, MAX_VAL](input, expected)
+
+
 fn main():
     try:
+        test_THRESHOLD()
         test_SOFTMAX()
         test_LOGSOFTMAX()
         test_RELU()
         test_SIGMOID()
         test_TANH()
+        test_HARDTANH()
     except e:
         print("[ERROR] Error in activations")
         print(e)

From 75678224d302e9fe4ed2c098e52ed5a086ae8ea2 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 16 May 2024 20:18:53 -0400
Subject: [PATCH 06/16] Rest of Hardtanh implementation

---
 basalt/autograd/ops/mlops.mojo | 66 ++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo
index 9be839ec..645f3c2c 100644
--- a/basalt/autograd/ops/mlops.mojo
+++ b/basalt/autograd/ops/mlops.mojo
@@ -208,6 +208,72 @@ struct TANH:
         return res_grad^
 
 
+struct HARDTANH:
+    @staticmethod
+    fn result_shape(t1_shape: TensorShape) -> TensorShape:
+        return t1_shape
+
+    @staticmethod
+    fn forward[
+        t1_shape: TensorShape,
+        attributes: AttributeVector,
+    ](inout res: Tensor[dtype], t1: Tensor[dtype]):
+        """Forward pass for hard tanh."""
+
+        alias MIN_VAL: Scalar[dtype] = attributes["min_val"].value().to_scalar[
+            dtype
+        ]()
+
+        alias MAX_VAL: Scalar[dtype] = attributes["max_val"].value().to_scalar[
+            dtype
+        ]()
+
+        @always_inline
+        fn hardtanh[
+            type: DType, simd_width: Int
+        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
+            alias casted_min = MIN_VAL.cast[type]()
+            alias casted_max = MAX_VAL.cast[type]()
+
+            var x_or_min = (x > x.splat(casted_min)).select[type](x, casted_min)
+
+            return (x_or_min < x_or_min.splat(casted_max)).select[type](
+                x_or_min, casted_max
+            )
+
+        elwise_transform[hardtanh](res, t1)
+
+    @staticmethod
+    fn backward[
+        ug_shape: TensorShape,
+        t1_shape: TensorShape,
+        attributes: AttributeVector,
+    ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]:
+        """Backward pass for hard tanh."""
+        alias MIN_VAL: Scalar[dtype] = attributes["min_val"].value().to_scalar[
+            dtype
+        ]()
+
+        alias MAX_VAL: Scalar[dtype] = attributes["max_val"].value().to_scalar[
+            dtype
+        ]()
+
+        @always_inline
+        fn hardtanh_bw[
+            type: DType, simd_width: Int
+        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
+            return (
+                x > x.splat(MIN_VAL.cast[type]())
+                and x < x.splat(MAX_VAL.cast[type]())
+            ).select[type](1, 0)
+
+        var res_grad = Tensor[dtype](t1_shape)
+
+        elwise_transform[hardtanh_bw](res_grad, t1)
+
+        return res_grad^
+
+
 struct CLIP:
     @staticmethod
     fn result_shape(t_shape: TensorShape) -> TensorShape:

From 14578fa7ec67f3ba4a5545657fec80f338d5c0df Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Wed, 22 May 2024 08:31:26 -0400
Subject: [PATCH 07/16] Implemented leaky_relu

---
 basalt/autograd/ops/mlops.mojo | 55 ++++++++++++++++++++++++++++++++++
 basalt/autograd/ops/ops.mojo   |  8 +++++
 basalt/nn/activations.mojo     | 10 +++++++
 3 files changed, 73 insertions(+)

diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo
index 645f3c2c..defe508d 100644
--- a/basalt/autograd/ops/mlops.mojo
+++ b/basalt/autograd/ops/mlops.mojo
@@ -162,6 +162,61 @@ struct RELU:
         return res_grad^
 
 
+struct LEAKYRELU:
+    @staticmethod
+    fn result_shape(t1_shape: TensorShape) -> TensorShape:
+        return t1_shape
+
+    @staticmethod
+    fn forward[
+        t1_shape: TensorShape,
+        attributes: AttributeVector,
+    ](inout res: Tensor[dtype], t1: Tensor[dtype]):
+        """Forward operation of leaky_relu."""
+
+        fn leaky_relu[
+            type: DType,
+            simd_width: Int,
+        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
+            var negative_slope = attributes["negative_slope"].value().to_scalar[
+                type
+            ]()
+            return (x > 0).select(x, x * negative_slope)
+
+        elwise_transform[leaky_relu](res, t1)
+
+    @staticmethod
+    fn backward[
+        ug_shape: TensorShape,
+        t1_shape: TensorShape,
+        attributes: AttributeVector,
+    ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]:
+        """Backward operation of leaky_relu."""
+
+        @always_inline
+        fn leaky_relu_bw[
+            type: DType, simd_width: Int
+        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
+            var negative_slope = attributes["negative_slope"].value().to_scalar[
+                type
+            ]()
+
+            return (x > 0).select[type](1, negative_slope)
+
+        var res_grad = Tensor[dtype](ug_shape)
+
+        @parameter
+        fn vec_leaky_relu_bw[nelts: Int](idx: Int):
+            res_grad.store[nelts](
+                idx,
+                leaky_relu_bw(t1.load[nelts](idx)) * ug.load[nelts](idx),
+            )
+
+        vectorize[vec_leaky_relu_bw, nelts](ug_shape.num_elements())
+
+        return res_grad^
+
+
 struct TANH:
     @staticmethod
     fn result_shape(t1_shape: TensorShape) -> TensorShape:
diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo
index c0d0db1c..53ace786 100644
--- a/basalt/autograd/ops/ops.mojo
+++ b/basalt/autograd/ops/ops.mojo
@@ -19,6 +19,7 @@ from .mlops import (
     THRESHOLD,
     SIGMOID,
     RELU,
+    LEAKYRELU,
     TANH,
     HARDTANH,
     CLIP,
@@ -73,6 +74,7 @@ struct OP(Stringable):
     alias SLICE = OP(25, "SLICE")
     alias THRESHOLD = OP(26, "THRESHOLD")
     alias HARDTANH = OP(27, "HARDTANH")
+    alias LEAKYRELU = OP(28, "LEAKYRELU")
 
     var id: UInt8
     var name: Bytes[16]
@@ -141,6 +143,8 @@ fn static_result_shape(
         return SIGMOID.result_shape(t1_shape)
     elif op == OP.RELU:
         return RELU.result_shape(t1_shape)
+    elif op == OP.LEAKYRELU:
+        return LEAKYRELU.result_shape(t1_shape)
     elif op == OP.TANH:
         return TANH.result_shape(t1_shape)
     elif op == OP.HARDTANH:
@@ -259,6 +263,8 @@ fn forward_op[
         SIGMOID.forward[t1_shape](res, t1)
     elif op == OP.RELU:
         RELU.forward[t1_shape](res, t1)
+    elif op == OP.LEAKYRELU:
+        LEAKYRELU.forward[t1_shape, attributes](res, t1)
     elif op == OP.TANH:
         TANH.forward[t1_shape](res, t1)
     elif op == OP.HARDTANH:
@@ -381,6 +387,8 @@ fn backward_op[
         res_grad = SIGMOID.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.RELU:
         res_grad = RELU.backward[ug_shape, t1_shape](ug, t1)
+    elif op == OP.LEAKYRELU:
+        res_grad = LEAKYRELU.backward[ug_shape, t1_shape, attributes](ug, t1)
     elif op == OP.TANH:
         res_grad = TANH.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.HARDTANH:
diff --git a/basalt/nn/activations.mojo b/basalt/nn/activations.mojo
index b0421212..61ef51d7 100644
--- a/basalt/nn/activations.mojo
+++ b/basalt/nn/activations.mojo
@@ -23,6 +23,16 @@ fn ReLU(inout g: Graph, input: Symbol) -> Symbol:
     return g.op(OP.RELU, input)
 
 
+fn LeakyReLU(
+    inout g: Graph, input: Symbol, negative_slope: Scalar[dtype]
+) -> Symbol:
+    return g.op(
+        OP.LEAKYRELU,
+        input,
+        attributes=AttributeVector(Attribute("negative_slope", negative_slope)),
+    )
+
+
 fn Sigmoid(inout g: Graph, input: Symbol) -> Symbol:
     return g.op(OP.SIGMOID, input)
 

From 362ef21c86b02078104c207d5e02e0aee8fbe295 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 23 May 2024 07:41:12 -0400
Subject: [PATCH 08/16] Added leaky_relu tests to test_activations.mojo

Next up: test_mlops and test_mlops_torch
---
 tests/mojo/test_activations.mojo | 50 ++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/tests/mojo/test_activations.mojo b/tests/mojo/test_activations.mojo
index 88330b16..eaee7bef 100644
--- a/tests/mojo/test_activations.mojo
+++ b/tests/mojo/test_activations.mojo
@@ -8,6 +8,7 @@ from basalt.nn import (
     Softmax,
     LogSoftmax,
     ReLU,
+    LeakyReLU,
     Sigmoid,
     Tanh,
     Hardtanh,
@@ -21,6 +22,9 @@ from tests import assert_tensors_equal
 
 alias Activation = fn (inout g: Graph, input: Symbol) -> Symbol
 alias AxisActivation = fn (inout g: Graph, input: Symbol, axis: Int) -> Symbol
+alias LeakyReLUActivation = fn (
+    inout g: Graph, input: Symbol, negative_slope: Scalar[dtype]
+) -> Symbol
 alias ThresholdActivation = fn (
     inout g: Graph,
     input: Symbol,
@@ -41,6 +45,18 @@ fn create_graph[
     return g^
 
 
+fn create_graph[
+    shape: TensorShape,
+    func: LeakyReLUActivation,
+    negative_slope: Scalar[dtype],
+]() -> Graph:
+    var g = Graph()
+    var x = g.input(shape)
+    var activation = func(g, x, negative_slope)
+    g.out(activation)
+    return g^
+
+
 fn create_graph[shape: TensorShape, func: Activation]() -> Graph:
     var g = Graph()
     var x = g.input(shape)
@@ -93,6 +109,21 @@ fn test_graph[
     assert_equal(len(graph.nodes), nodes)
 
 
+fn test_graph[
+    shape: TensorShape,
+    func: LeakyReLUActivation,
+    nodes: Int,
+    negative_slope: Scalar[dtype],
+](input: Tensor[dtype], expected: Tensor[dtype]) raises:
+    alias graph = create_graph[shape, func, negative_slope]()
+
+    var model = Model[graph](inference_only=True)
+    var res = model.inference(input)[0]
+
+    assert_tensors_equal["almost"](res, expected)
+    assert_equal(len(graph.nodes), nodes)
+
+
 # TODO: All these overloads feel redundant. Find a way to condense them
 fn test_graph[
     shape: TensorShape,
@@ -183,6 +214,25 @@ fn test_RELU() raises:
     test_graph[shape, ReLU, nodes](input, expected)
 
 
+fn test_LEAKYRELU() raises:
+    alias negative_slope = 0.1
+
+    alias shape = TensorShape(2, 3)
+    alias nodes = 1
+
+    var input = Tensor[dtype](shape)
+
+    for i in range(6):
+        input[i] = i - 3
+
+    var expected = Tensor[dtype](shape)
+
+    for i in range(6):
+        expected[i] = i - 3 if i - 3 > 0 else negative_slope * (i - 3)
+
+    test_graph[shape, LeakyReLU, nodes, negative_slope](input, expected)
+
+
 fn test_SIGMOID() raises:
     alias shape = TensorShape(2, 3)
     alias nodes = 1

From b24dd09dc5a19ce4cdee2a1abc0aa7f822fdb541 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 23 May 2024 07:42:32 -0400
Subject: [PATCH 09/16] made LeakyReLU importable through basalt.nn

---
 basalt/nn/__init__.mojo | 1 +
 1 file changed, 1 insertion(+)

diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo
index 855ac8c8..47d56f33 100644
--- a/basalt/nn/__init__.mojo
+++ b/basalt/nn/__init__.mojo
@@ -10,6 +10,7 @@ from .activations import (
     Softmax,
     LogSoftmax,
     ReLU,
+    LeakyReLU,
     Sigmoid,
     Tanh,
     Hardtanh,

From 7cece9f29da26ca506cce8a99fc15282922ddeec Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 23 May 2024 08:48:00 -0400
Subject: [PATCH 10/16] Implemented threshold test in test_mlops.mojo

---
 tests/mojo/test_mlops.mojo | 52 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo
index 2ba723e6..127ce0cf 100644
--- a/tests/mojo/test_mlops.mojo
+++ b/tests/mojo/test_mlops.mojo
@@ -1,11 +1,59 @@
 from basalt import dtype, nelts
 from basalt.autograd import OP
 from basalt.autograd.attributes import AttributeVector, Attribute
-from basalt.autograd.ops.mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE
+from basalt.autograd.ops.mlops import (
+    SIGMOID,
+    RELU,
+    THRESHOLD,
+    LEAKYRELU,
+    TANH,
+    HARDTANH,
+    CLIP,
+    SQUEEZE,
+    UNSQUEEZE,
+)
 from basalt.nn import Tensor, TensorShape
 from basalt.utils.tensorutils import fill
 
-from tests import assert_tensors_equal, test_unary_op, test_unary_op_backward, to_numpy
+from tests import (
+    assert_tensors_equal,
+    test_unary_op,
+    test_unary_op_backward,
+    to_numpy,
+)
+
+
+fn test_THRESHOLD() raises:
+    alias t1_shape = TensorShape(2, 3)
+    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
+    fill(t1, 4.0)
+
+    var expected = Tensor[dtype](2, 3)
+    fill(expected, 4.0)
+
+    test_unary_op[
+        OP.THRESHOLD,
+        t1_shape,
+        AttributeVector(Attribute("threshold", 3), Attribute("value", 2)),
+    ](t1, expected)
+
+
+fn test_backward_THRESHOLD() raises:
+    alias t1_shape = TensorShape(2, 3)
+    alias ug_shape = TensorShape(2, 3)
+    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
+    var ug: Tensor[dtype] = Tensor[dtype](ug_shape)
+    fill(ug, 2.0)
+
+    var expected_grad = Tensor[dtype](2, 3)
+    fill(expected_grad, 0)
+
+    test_unary_op_backward[
+        OP.THRESHOLD,
+        t1_shape,
+        ug_shape,
+        AttributeVector(Attribute("threshold", 3), Attribute("value", 2)),
+    ](t1, ug, expected_grad)
 
 
 fn test_SIGMOID() raises:

From 20919eac9f1a4dba0702eafaa64cae6284af2198 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 23 May 2024 08:48:41 -0400
Subject: [PATCH 11/16] Implemented LeakyReLU tests in test_mlops

---
 tests/mojo/test_mlops.mojo | 47 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo
index 127ce0cf..584e00e3 100644
--- a/tests/mojo/test_mlops.mojo
+++ b/tests/mojo/test_mlops.mojo
@@ -119,6 +119,53 @@ fn test_backward_RELU() raises:
     test_unary_op_backward[OP.RELU, t1_shape, ug_shape](t1, ug, expected_grad)
 
 
+fn test_LEAKYRELU() raises:
+    alias t1_shape = TensorShape(2, 3)
+    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
+    # TODO: When tensors can do slices, this could be changed to two fill functions.
+    for i in range(3):
+        t1[i] = 3
+    for i in range(3, 6):
+        t1[i] = -3
+
+    var expected = Tensor[dtype](2, 3)
+    for i in range(3):
+        expected[i] = 3
+    for i in range(3, 6):
+        expected[i] = -0.3
+
+    test_unary_op[
+        OP.LEAKYRELU,
+        t1_shape,
+        AttributeVector(Attribute("negative_slope", 0.1)),
+    ](t1, expected)
+
+
+fn test_backward_LEAKYRELU() raises:
+    alias t1_shape = TensorShape(2, 3)
+    alias ug_shape = TensorShape(2, 3)
+    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
+    var ug: Tensor[dtype] = Tensor[dtype](ug_shape)
+    for i in range(3):
+        t1[i] = 3
+    for i in range(3, 6):
+        t1[i] = -3
+    fill(ug, 5.0)
+
+    var expected_grad = Tensor[dtype](2, 3)
+    for i in range(3):
+        expected_grad[i] = 1 * 5.0
+    for i in range(3, 6):
+        expected_grad[i] = 0.1 * 5.0
+
+    test_unary_op_backward[
+        OP.LEAKYRELU,
+        t1_shape,
+        ug_shape,
+        AttributeVector(Attribute("negative_slope", 0.1)),
+    ](t1, ug, expected_grad)
+
+
 fn test_TANH() raises:
     alias t1_shape = TensorShape(2, 3)
     var t1: Tensor[dtype] = Tensor[dtype](t1_shape)

From c92d75d0dd191f3c6094420722fed87bb7d8a812 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 23 May 2024 08:49:03 -0400
Subject: [PATCH 12/16] Implemented HardTanH tests in test_mlops

---
 tests/mojo/test_mlops.mojo | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo
index 584e00e3..3ceeabc1 100644
--- a/tests/mojo/test_mlops.mojo
+++ b/tests/mojo/test_mlops.mojo
@@ -189,6 +189,38 @@ fn test_backward_TANH() raises:
     test_unary_op_backward[OP.TANH, t1_shape, ug_shape](t1, ug, expected_grad)
 
 
+fn test_HARDTANH() raises:
+    alias t1_shape = TensorShape(2, 3)
+    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
+
+    var expected = Tensor[dtype](2, 3)
+    fill(expected, 0.0)
+
+    test_unary_op[
+        OP.HARDTANH,
+        t1_shape,
+        AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)),
+    ](t1, expected)
+
+
+fn test_backward_HARDTANH() raises:
+    alias t1_shape = TensorShape(2, 3)
+    alias ug_shape = TensorShape(2, 3)
+    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
+    var ug: Tensor[dtype] = Tensor[dtype](ug_shape)
+    fill(ug, 5.0)
+
+    var expected_grad = Tensor[dtype](2, 3)
+    fill(expected_grad, 0)  # 5 > 3, so slope is 0.
+
+    test_unary_op_backward[
+        OP.HARDTANH,
+        t1_shape,
+        ug_shape,
+        AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)),
+    ](t1, ug, expected_grad)
+
+
 fn test_CLIP() raises:
     alias t1_shape = TensorShape(2, 3)
     var t1: Tensor[dtype] = Tensor[dtype](t1_shape)

From 3442d783aefbf001fb5d09b5d867c01472479db2 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Thu, 23 May 2024 08:49:43 -0400
Subject: [PATCH 13/16] Autogenerated formatting changes to test_mlops

---
 tests/mojo/test_mlops.mojo | 300 ++++++++++++++++++++++++++-----------
 1 file changed, 213 insertions(+), 87 deletions(-)

diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo
index 3ceeabc1..fe3ddb5d 100644
--- a/tests/mojo/test_mlops.mojo
+++ b/tests/mojo/test_mlops.mojo
@@ -78,7 +78,9 @@ fn test_backward_SIGMOID() raises:
         expected_grad, 5.0 * 0.25
     )  # 0.25 = d(sigmoid(0))/dx = sigmoid(0) * (1 - sigmoid(0))
 
-    test_unary_op_backward[OP.SIGMOID, t1_shape, ug_shape](t1, ug, expected_grad)
+    test_unary_op_backward[OP.SIGMOID, t1_shape, ug_shape](
+        t1, ug, expected_grad
+    )
 
 
 fn test_RELU() raises:
@@ -237,7 +239,9 @@ fn test_CLIP() raises:
     for i in range(6):
         var val = Scalar[dtype](i - 3)
         expected_min[i] = val if (val > -1.1) else -1.1
-    test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr)](t1, expected_min)
+    test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr)](
+        t1, expected_min
+    )
 
     # Clip with max
     alias max_attr = Attribute("max", 1.1)
@@ -245,7 +249,9 @@ fn test_CLIP() raises:
     for i in range(6):
         var val = Scalar[dtype](i - 3)
         expected_max[i] = val if (val < 1.1) else 1.1
-    test_unary_op[OP.CLIP, t1_shape, AttributeVector(max_attr)](t1, expected_max)
+    test_unary_op[OP.CLIP, t1_shape, AttributeVector(max_attr)](
+        t1, expected_max
+    )
 
     # Clip with min and max
     var expected = Tensor[dtype](2, 3)
@@ -257,7 +263,9 @@ fn test_CLIP() raises:
             expected[i] = 1.1
         else:
             expected[i] = val
-    test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr, max_attr)](t1, expected)
+    test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr, max_attr)](
+        t1, expected
+    )
 
 
 fn test_backward_CLIP() raises:
@@ -279,7 +287,9 @@ fn test_backward_CLIP() raises:
     for i in range(6):
         var val = Scalar[dtype](i - 3)
         expected_min[i] = 5.0 if (val > -1.1) else 0.0
-    test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, min_attr](t1, ug, expected_min)
+    test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, min_attr](
+        t1, ug, expected_min
+    )
 
     # Clip with max
     alias max_attr = AttributeVector(Attribute("max", 1.1))
@@ -287,7 +297,9 @@ fn test_backward_CLIP() raises:
     for i in range(6):
         var val = Scalar[dtype](i - 3)
         expected_max[i] = 5.0 if (val < 1.1) else 0.0
-    test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, max_attr](t1, ug, expected_max)
+    test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, max_attr](
+        t1, ug, expected_max
+    )
 
     # Clip with min and max
     alias attrs = AttributeVector(Attribute("min", -1.1), Attribute("max", 1.1))
@@ -328,7 +340,9 @@ fn test_SQUEEZE() raises:
     expected = Tensor[dtype](1, 2, 3)
     fill(expected, 5.0)
     test_unary_op[
-        OP.SQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(2, 4)))
+        OP.SQUEEZE,
+        t1_shape,
+        AttributeVector(Attribute("dims", TensorShape(2, 4))),
     ](t1, expected)
 
 
@@ -343,7 +357,9 @@ fn test_backward_SQUEEZE() raises:
     var expected_grad = Tensor[dtype](2, 1, 3, 1)
     fill(expected_grad, 5.0)
 
-    test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape](t1, ug, expected_grad)
+    test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape](
+        t1, ug, expected_grad
+    )
 
 
 fn test_UNSQUEEZE() raises:
@@ -355,26 +371,34 @@ fn test_UNSQUEEZE() raises:
     var expected = Tensor[dtype](2, 1, 3, 1)
     fill(expected, 5.0)
     test_unary_op[
-        OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(1, 3)))
+        OP.UNSQUEEZE,
+        t1_shape,
+        AttributeVector(Attribute("dims", TensorShape(1, 3))),
     ](t1, expected)
 
     expected = Tensor[dtype](2, 1, 3)
     fill(expected, 5.0)
 
     test_unary_op[
-        OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(1)))
+        OP.UNSQUEEZE,
+        t1_shape,
+        AttributeVector(Attribute("dims", TensorShape(1))),
     ](t1, expected)
 
     expected = Tensor[dtype](1, 2, 3)
     fill(expected, 5.0)
     test_unary_op[
-        OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(-3)))
+        OP.UNSQUEEZE,
+        t1_shape,
+        AttributeVector(Attribute("dims", TensorShape(-3))),
     ](t1, expected)
 
     expected = Tensor[dtype](2, 1, 3, 1)
     fill(expected, 5.0)
     test_unary_op[
-        OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(-1, -3)))
+        OP.UNSQUEEZE,
+        t1_shape,
+        AttributeVector(Attribute("dims", TensorShape(-1, -3))),
     ](t1, expected)
 
 
@@ -389,7 +413,9 @@ fn test_backward_UNSQUEEZE() raises:
     var expected_grad = Tensor[dtype](2, 3)
     fill(expected_grad, 5.0)
 
-    test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape](t1, ug, expected_grad)
+    test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape](
+        t1, ug, expected_grad
+    )
 
 
 fn test_SLICE() raises:
@@ -397,7 +423,7 @@ fn test_SLICE() raises:
     var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
     for i in range(t1.num_elements()):
         t1[i] = i
-    
+
     alias slice = Slice(1, 3, 1)
 
     # dim = 0
@@ -405,15 +431,17 @@ fn test_SLICE() raises:
     for i in range(2):
         for j in range(4):
             for k in range(5):
-                expected_0[i*4*5 + j*5 + k] = (i + 1) * 4 * 5 + j * 5 + k
+                expected_0[i * 4 * 5 + j * 5 + k] = (i + 1) * 4 * 5 + j * 5 + k
 
     test_unary_op[
-        OP.SLICE, t1_shape, AttributeVector(
+        OP.SLICE,
+        t1_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice.start)),
             Attribute("ends", TensorShape(slice.end)),
             Attribute("steps", TensorShape(slice.step)),
-            Attribute("axes", TensorShape(0))
-        )
+            Attribute("axes", TensorShape(0)),
+        ),
     ](t1, expected_0)
 
     # dim = 1
@@ -421,15 +449,17 @@ fn test_SLICE() raises:
     for i in range(3):
         for j in range(2):
             for k in range(5):
-                expected_1[i*2*5 + j*5 + k] = i * 4 * 5 + (j + 1) * 5 + k
+                expected_1[i * 2 * 5 + j * 5 + k] = i * 4 * 5 + (j + 1) * 5 + k
 
     test_unary_op[
-        OP.SLICE, t1_shape, AttributeVector(
+        OP.SLICE,
+        t1_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice.start)),
             Attribute("ends", TensorShape(slice.end)),
             Attribute("steps", TensorShape(slice.step)),
-            Attribute("axes", TensorShape(1))
-        )
+            Attribute("axes", TensorShape(1)),
+        ),
     ](t1, expected_1)
 
     # dim = 2
@@ -437,15 +467,17 @@ fn test_SLICE() raises:
     for i in range(3):
         for j in range(4):
             for k in range(2):
-                expected_2[i*4*2 + j*2 + k] = i * 4 * 5 + j * 5 + (k + 1)
-        
+                expected_2[i * 4 * 2 + j * 2 + k] = i * 4 * 5 + j * 5 + (k + 1)
+
     test_unary_op[
-        OP.SLICE, t1_shape, AttributeVector(
+        OP.SLICE,
+        t1_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice.start)),
             Attribute("ends", TensorShape(slice.end)),
             Attribute("steps", TensorShape(slice.step)),
-            Attribute("axes", TensorShape(2))
-        )
+            Attribute("axes", TensorShape(2)),
+        ),
     ](t1, expected_2)
 
 
@@ -462,15 +494,19 @@ fn test_SLICE_step() raises:
     for i in range(3):
         for j in range(2):
             for k in range(2):
-                expected_0[i*2*2 + j*2 + k] = (i*2 + 1) * 2 * 2 + j * 2 + k
+                expected_0[i * 2 * 2 + j * 2 + k] = (
+                    (i * 2 + 1) * 2 * 2 + j * 2 + k
+                )
 
     test_unary_op[
-        OP.SLICE, t0_shape, AttributeVector(
+        OP.SLICE,
+        t0_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice.start)),
             Attribute("ends", TensorShape(slice.end)),
             Attribute("steps", TensorShape(slice.step)),
-            Attribute("axes", TensorShape(0))
-        )
+            Attribute("axes", TensorShape(0)),
+        ),
     ](t0, expected_0)
 
     # dim = 1
@@ -483,15 +519,19 @@ fn test_SLICE_step() raises:
     for i in range(2):
         for j in range(3):
             for k in range(2):
-                expected_1[i*3*2 + j*2 + k] = i * 10 * 2 + (j*2 + 1) * 2 + k
+                expected_1[i * 3 * 2 + j * 2 + k] = (
+                    i * 10 * 2 + (j * 2 + 1) * 2 + k
+                )
 
     test_unary_op[
-        OP.SLICE, t1_shape, AttributeVector(
+        OP.SLICE,
+        t1_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice.start)),
             Attribute("ends", TensorShape(slice.end)),
             Attribute("steps", TensorShape(slice.step)),
-            Attribute("axes", TensorShape(1))
-        )
+            Attribute("axes", TensorShape(1)),
+        ),
     ](t1, expected_1)
 
     # dim = 2
@@ -504,15 +544,19 @@ fn test_SLICE_step() raises:
     for i in range(2):
         for j in range(2):
             for k in range(3):
-                expected_2[i*2*3 + j*3 + k] = i * 2 * 10 + j * 10 + (k*2 + 1)
+                expected_2[i * 2 * 3 + j * 3 + k] = (
+                    i * 2 * 10 + j * 10 + (k * 2 + 1)
+                )
 
     test_unary_op[
-        OP.SLICE, t2_shape, AttributeVector(
+        OP.SLICE,
+        t2_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice.start)),
             Attribute("ends", TensorShape(slice.end)),
             Attribute("steps", TensorShape(slice.step)),
-            Attribute("axes", TensorShape(2))
-        )
+            Attribute("axes", TensorShape(2)),
+        ),
     ](t2, expected_2)
 
 
@@ -529,15 +573,19 @@ fn test_SLICE_neg() raises:
     for i in range(3):
         for j in range(2):
             for k in range(2):
-                expected_0[i*2*2 + j*2 + k] = StaticIntTuple[3](6, 4, 2)[i] * 2 * 2 + j * 2 + k
+                expected_0[i * 2 * 2 + j * 2 + k] = (
+                    StaticIntTuple[3](6, 4, 2)[i] * 2 * 2 + j * 2 + k
+                )
 
     test_unary_op[
-        OP.SLICE, t0_shape, AttributeVector(
+        OP.SLICE,
+        t0_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice.start)),
             Attribute("ends", TensorShape(slice.end)),
             Attribute("steps", TensorShape(slice.step)),
-            Attribute("axes", TensorShape(0))
-        )
+            Attribute("axes", TensorShape(0)),
+        ),
     ](t0, expected_0)
 
     # dim = 1
@@ -550,15 +598,19 @@ fn test_SLICE_neg() raises:
     for i in range(2):
         for j in range(3):
             for k in range(2):
-                expected_1[i*3*2 + j*2 + k] = i * 10 * 2 + StaticIntTuple[3](6, 4, 2)[j] * 2 + k
+                expected_1[i * 3 * 2 + j * 2 + k] = (
+                    i * 10 * 2 + StaticIntTuple[3](6, 4, 2)[j] * 2 + k
+                )
 
     test_unary_op[
-        OP.SLICE, t1_shape, AttributeVector(
+        OP.SLICE,
+        t1_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice.start)),
             Attribute("ends", TensorShape(slice.end)),
             Attribute("steps", TensorShape(slice.step)),
-            Attribute("axes", TensorShape(1))
-        )
+            Attribute("axes", TensorShape(1)),
+        ),
     ](t1, expected_1)
 
     # dim = 2
@@ -571,15 +623,19 @@ fn test_SLICE_neg() raises:
     for i in range(2):
         for j in range(2):
             for k in range(3):
-                expected_2[i*2*3 + j*3 + k] = i * 2 * 10 + j * 10 + StaticIntTuple[3](6, 4, 2)[k]
+                expected_2[i * 2 * 3 + j * 3 + k] = (
+                    i * 2 * 10 + j * 10 + StaticIntTuple[3](6, 4, 2)[k]
+                )
 
     test_unary_op[
-        OP.SLICE, t2_shape, AttributeVector(
+        OP.SLICE,
+        t2_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice.start)),
             Attribute("ends", TensorShape(slice.end)),
             Attribute("steps", TensorShape(slice.step)),
-            Attribute("axes", TensorShape(2))
-        )
+            Attribute("axes", TensorShape(2)),
+        ),
     ](t2, expected_2)
 
 
@@ -597,22 +653,35 @@ fn test_SLICE_multiple_axes() raises:
     for i in range(3):
         for j in range(3):
             for k in range(5):
-                expected[i*3*5 + j*5 + k] = StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + StaticIntTuple[3](3, 6, 9)[j] * 40 + StaticIntTuple[5](5, 7, 9, 11, 13)[k]
-    
+                expected[i * 3 * 5 + j * 5 + k] = (
+                    StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40
+                    + StaticIntTuple[3](3, 6, 9)[j] * 40
+                    + StaticIntTuple[5](5, 7, 9, 11, 13)[k]
+                )
+
     test_unary_op[
-        OP.SLICE, t1_shape, AttributeVector(
-            Attribute("starts", TensorShape(slice_0.start, slice_1.start, slice_2.start)),
-            Attribute("ends", TensorShape(slice_0.end, slice_1.end, slice_2.end)),
-            Attribute("steps", TensorShape(slice_0.step, slice_1.step, slice_2.step)),
+        OP.SLICE,
+        t1_shape,
+        AttributeVector(
+            Attribute(
+                "starts",
+                TensorShape(slice_0.start, slice_1.start, slice_2.start),
+            ),
+            Attribute(
+                "ends", TensorShape(slice_0.end, slice_1.end, slice_2.end)
+            ),
+            Attribute(
+                "steps", TensorShape(slice_0.step, slice_1.step, slice_2.step)
+            ),
             # Attribute("axes", TensorShape(0, 1, 2))
-        )
+        ),
     ](t1, expected)
 
     alias t2_shape = TensorShape(20, 32, 40, 50)
     var t2: Tensor[dtype] = Tensor[dtype](t2_shape)
     for i in range(t2.num_elements()):
         t2[i] = i
-    
+
     alias slice_2_1 = Slice(1, 6, 2)
     alias slice_2_2 = Slice(3, 10, 3)
     alias slice_2_3 = Slice(5, 15, 2)
@@ -624,14 +693,42 @@ fn test_SLICE_multiple_axes() raises:
         for j in range(3):
             for k in range(5):
                 for l in range(4):
-                    expected_2[i*3*5*4 + j*5*4 + k*4 + l] = StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 * 50 + StaticIntTuple[3](3, 6, 9)[j] * 40 * 50 + StaticIntTuple[5](5, 7, 9, 11, 13)[k] * 50 + StaticIntTuple[4](7, 11, 15, 19)[l]
-    
+                    expected_2[i * 3 * 5 * 4 + j * 5 * 4 + k * 4 + l] = (
+                        StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 * 50
+                        + StaticIntTuple[3](3, 6, 9)[j] * 40 * 50
+                        + StaticIntTuple[5](5, 7, 9, 11, 13)[k] * 50
+                        + StaticIntTuple[4](7, 11, 15, 19)[l]
+                    )
+
     test_unary_op[
-        OP.SLICE, t2_shape, AttributeVector(
-            Attribute("starts", TensorShape(slice_2_1.start, slice_2_2.start, slice_2_3.start, slice_2_4.start)),
-            Attribute("ends", TensorShape(slice_2_1.end, slice_2_2.end, slice_2_3.end, slice_2_4.end)),
-            Attribute("steps", TensorShape(slice_2_1.step, slice_2_2.step, slice_2_3.step, slice_2_4.step)),
-        )
+        OP.SLICE,
+        t2_shape,
+        AttributeVector(
+            Attribute(
+                "starts",
+                TensorShape(
+                    slice_2_1.start,
+                    slice_2_2.start,
+                    slice_2_3.start,
+                    slice_2_4.start,
+                ),
+            ),
+            Attribute(
+                "ends",
+                TensorShape(
+                    slice_2_1.end, slice_2_2.end, slice_2_3.end, slice_2_4.end
+                ),
+            ),
+            Attribute(
+                "steps",
+                TensorShape(
+                    slice_2_1.step,
+                    slice_2_2.step,
+                    slice_2_3.step,
+                    slice_2_4.step,
+                ),
+            ),
+        ),
     ](t2, expected_2)
 
 
@@ -650,15 +747,18 @@ fn test_backward_SLICE() raises:
     for i in range(2):
         for j in range(4):
             for k in range(5):
-                expected_ug0[(i+1)*4*5 + j*5 + k] = 1.0
+                expected_ug0[(i + 1) * 4 * 5 + j * 5 + k] = 1.0
 
     test_unary_op_backward[
-        OP.SLICE, t0_shape, ug0_shape, AttributeVector(
+        OP.SLICE,
+        t0_shape,
+        ug0_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice_0.start)),
             Attribute("ends", TensorShape(slice_0.end)),
             Attribute("steps", TensorShape(slice_0.step)),
-            Attribute("axes", TensorShape(0))
-        )
+            Attribute("axes", TensorShape(0)),
+        ),
     ](t0, ug0, expected_ug0)
 
     # dim = 1 (step = 2)
@@ -670,20 +770,23 @@ fn test_backward_SLICE() raises:
     alias ug1_shape = TensorShape(2, 3, 2)
     var ug1: Tensor[dtype] = Tensor[dtype](ug1_shape)
     fill(ug1, 1.0)
-    
+
     var expected_ug1 = Tensor[dtype](t1_shape)
     for i in range(2):
         for j in range(3):
             for k in range(2):
-                expected_ug1[i*10*2 + (j*2 + 1)*2 + k] = 1.0
+                expected_ug1[i * 10 * 2 + (j * 2 + 1) * 2 + k] = 1.0
 
     test_unary_op_backward[
-        OP.SLICE, t1_shape, ug1_shape, AttributeVector(
+        OP.SLICE,
+        t1_shape,
+        ug1_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice_1.start)),
             Attribute("ends", TensorShape(slice_1.end)),
             Attribute("steps", TensorShape(slice_1.step)),
-            Attribute("axes", TensorShape(1))
-        )
+            Attribute("axes", TensorShape(1)),
+        ),
     ](t1, ug1, expected_ug1)
 
     # dim = 2 (step = -2)
@@ -700,15 +803,20 @@ fn test_backward_SLICE() raises:
     for i in range(2):
         for j in range(2):
             for k in range(3):
-                expected_ug2[i*2*10 + j*10 + StaticIntTuple[3](6, 4, 2)[k]] = 1.0
+                expected_ug2[
+                    i * 2 * 10 + j * 10 + StaticIntTuple[3](6, 4, 2)[k]
+                ] = 1.0
 
     test_unary_op_backward[
-        OP.SLICE, t2_shape, ug2_shape, AttributeVector(
+        OP.SLICE,
+        t2_shape,
+        ug2_shape,
+        AttributeVector(
             Attribute("starts", TensorShape(slice_2.start)),
             Attribute("ends", TensorShape(slice_2.end)),
             Attribute("steps", TensorShape(slice_2.step)),
-            Attribute("axes", TensorShape(2))
-        )
+            Attribute("axes", TensorShape(2)),
+        ),
     ](t2, ug2, expected_ug2)
 
 
@@ -726,8 +834,12 @@ fn test_backward_SLICE_multiple_axes() raises:
     for i in range(3):
         for j in range(3):
             for k in range(5):
-                expected[i*3*5 + j*5 + k] = StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + StaticIntTuple[3](3, 6, 9)[j] * 40 + StaticIntTuple[5](5, 7, 9, 11, 13)[k]
-    
+                expected[i * 3 * 5 + j * 5 + k] = (
+                    StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40
+                    + StaticIntTuple[3](3, 6, 9)[j] * 40
+                    + StaticIntTuple[5](5, 7, 9, 11, 13)[k]
+                )
+
     alias ug_shape = TensorShape(3, 3, 5)
     var ug: Tensor[dtype] = Tensor[dtype](ug_shape)
     fill(ug, 1.0)
@@ -736,14 +848,28 @@ fn test_backward_SLICE_multiple_axes() raises:
     for i in range(3):
         for j in range(3):
             for k in range(5):
-                expected_ug[StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + StaticIntTuple[3](3, 6, 9)[j] * 40 + StaticIntTuple[5](5, 7, 9, 11, 13)[k]] = 1.0
+                expected_ug[
+                    StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40
+                    + StaticIntTuple[3](3, 6, 9)[j] * 40
+                    + StaticIntTuple[5](5, 7, 9, 11, 13)[k]
+                ] = 1.0
 
     test_unary_op_backward[
-        OP.SLICE, t1_shape, ug_shape, AttributeVector(
-            Attribute("starts", TensorShape(slice_0.start, slice_1.start, slice_2.start)),
-            Attribute("ends", TensorShape(slice_0.end, slice_1.end, slice_2.end)),
-            Attribute("steps", TensorShape(slice_0.step, slice_1.step, slice_2.step)),
-        )
+        OP.SLICE,
+        t1_shape,
+        ug_shape,
+        AttributeVector(
+            Attribute(
+                "starts",
+                TensorShape(slice_0.start, slice_1.start, slice_2.start),
+            ),
+            Attribute(
+                "ends", TensorShape(slice_0.end, slice_1.end, slice_2.end)
+            ),
+            Attribute(
+                "steps", TensorShape(slice_0.step, slice_1.step, slice_2.step)
+            ),
+        ),
     ](t1, ug, expected_ug)
 
 

From 62ec6a46b7c0f639747e155891723dc7679ab63e Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Fri, 31 May 2024 07:12:06 -0400
Subject: [PATCH 14/16] Added torch compatibility test for leaky_relu

---
 tests/python/test_mlops_torch.mojo | 207 +++++++++++++++++++++--------
 1 file changed, 154 insertions(+), 53 deletions(-)

diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo
index 2f4747cb..a5d3b27f 100644
--- a/tests/python/test_mlops_torch.mojo
+++ b/tests/python/test_mlops_torch.mojo
@@ -47,6 +47,11 @@ fn torch_unary_op(
             expected = torch.sigmoid(input_1)
         elif op == OP.RELU:
             expected = torch.relu(input_1)
+        elif op == OP.LEAKYRELU:
+            expected = torch.nn.functional.leaky_relu(
+                input_1,
+                attrs.value()["negative_slope"].value().to_scalar[dtype](),
+            )
         elif op == OP.TANH:
             expected = torch.tanh(input_1)
         elif op == OP.CLIP:
@@ -65,7 +70,9 @@ fn torch_unary_op(
                 var dim = attrs["dims"]
 
                 if dim:
-                    expected = torch.squeeze(input_1, dim=dim.value().to_shape()[0])
+                    expected = torch.squeeze(
+                        input_1, dim=dim.value().to_shape()[0]
+                    )
                 else:
                     expected = torch.squeeze(input_1)
             elif attrs_tuple:
@@ -78,7 +85,9 @@ fn torch_unary_op(
                 var dim = attrs["dims"]
 
                 if dim:
-                    expected = torch.unsqueeze(input_1, dim=dim.value().to_shape()[0])
+                    expected = torch.unsqueeze(
+                        input_1, dim=dim.value().to_shape()[0]
+                    )
                 else:
                     expected = torch.unsqueeze(input_1, 0)
             elif attrs_tuple:
@@ -102,11 +111,11 @@ fn torch_unary_op(
 
                 if step < 0:
                     flip_dims.append(dim)
-                    step = step *- 1
+                    step = step * -1
                     end, start = (end + 1) * -1, (start + 1) * -1
 
                 indices[dim] = py.slice(start, end, step)
-            
+
             expected = input_1.flip(flip_dims)[indices]
         else:
             print("Error: op not supported (returning the value input_1): ", op)
@@ -159,6 +168,31 @@ fn test_RELU() raises:
     )
 
 
+fn test_LEAKYRELU() raises:
+    alias t1_shape = TensorShape(37, 63, 107)
+    alias ug_shape = TensorShape(37, 63, 107)
+    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
+    rand(t1.data(), t1.num_elements())
+
+    var ug = Tensor[dtype](ug_shape)
+    rand(ug.data(), ug.num_elements())
+
+    var expected_and_grad = torch_unary_op(
+        OP.LEAKYRELU, t1, ug, AttributeVector(Attribute("negative_slope", 0.1))
+    )
+    test_unary_op[
+        OP.LEAKYRELU,
+        t1_shape,
+        AttributeVector(Attribute("negative_slope", 0.1)),
+    ](t1, expected_and_grad.expected)
+    test_unary_op_backward[
+        OP.LEAKYRELU,
+        t1_shape,
+        ug_shape,
+        AttributeVector(Attribute("negative_slope", 0.1)),
+    ](t1, ug, expected_and_grad.grad_1)
+
+
 fn test_TANH() raises:
     alias t1_shape = TensorShape(37, 63, 107)
     alias ug_shape = TensorShape(37, 63, 107)
@@ -193,23 +227,27 @@ fn test_CLIP() raises:
 
     # Clip with min
     alias min_attr = Attribute("min", 0.3333)
-    expected_and_grad = torch_unary_op(OP.CLIP, t1, ug, AttributeVector(min_attr))
+    expected_and_grad = torch_unary_op(
+        OP.CLIP, t1, ug, AttributeVector(min_attr)
+    )
     test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr)](
         t1, expected_and_grad.expected
     )
-    test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, AttributeVector(min_attr)](
-        t1, ug, expected_and_grad.grad_1
-    )
+    test_unary_op_backward[
+        OP.CLIP, t1_shape, ug_shape, AttributeVector(min_attr)
+    ](t1, ug, expected_and_grad.grad_1)
 
     # Clip with max
     alias max_attr = Attribute("max", 0.6666)
-    expected_and_grad = torch_unary_op(OP.CLIP, t1, ug, AttributeVector(max_attr))
+    expected_and_grad = torch_unary_op(
+        OP.CLIP, t1, ug, AttributeVector(max_attr)
+    )
     test_unary_op[OP.CLIP, t1_shape, AttributeVector(max_attr)](
         t1, expected_and_grad.expected
     )
-    test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, AttributeVector(max_attr)](
-        t1, ug, expected_and_grad.grad_1
-    )
+    test_unary_op_backward[
+        OP.CLIP, t1_shape, ug_shape, AttributeVector(max_attr)
+    ](t1, ug, expected_and_grad.grad_1)
 
     # Clip with min and max
     expected_and_grad = torch_unary_op(
@@ -249,9 +287,9 @@ fn test_SQUEEZE() raises:
     test_unary_op[OP.SQUEEZE, t1_shape, AttributeVector(dim)](
         t1, expected_and_grad.expected
     )
-    test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape_1, AttributeVector(dim)](
-        t1, ug, expected_and_grad.grad_1
-    )
+    test_unary_op_backward[
+        OP.SQUEEZE, t1_shape, ug_shape_1, AttributeVector(dim)
+    ](t1, ug, expected_and_grad.grad_1)
 
     alias ug_shape_2 = TensorShape(20, 28, 1)
     ug = Tensor[dtype](ug_shape_2)
@@ -259,13 +297,15 @@ fn test_SQUEEZE() raises:
 
     alias dim_2 = Attribute("dims", TensorShape(1))
 
-    expected_and_grad = torch_unary_op(OP.SQUEEZE, t1, ug, AttributeVector(dim_2))
+    expected_and_grad = torch_unary_op(
+        OP.SQUEEZE, t1, ug, AttributeVector(dim_2)
+    )
     test_unary_op[OP.SQUEEZE, t1_shape, AttributeVector(dim_2)](
         t1, expected_and_grad.expected
     )
-    test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape_2, AttributeVector(dim_2)](
-        t1, ug, expected_and_grad.grad_1
-    )
+    test_unary_op_backward[
+        OP.SQUEEZE, t1_shape, ug_shape_2, AttributeVector(dim_2)
+    ](t1, ug, expected_and_grad.grad_1)
 
     # Squeeze with multiple dims
     ug = Tensor[dtype](ug_shape)
@@ -282,9 +322,9 @@ fn test_SQUEEZE() raises:
     test_unary_op[OP.SQUEEZE, t1_shape, AttributeVector(dims)](
         t1, expected_and_grad.expected
     )
-    test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape, AttributeVector(dims)](
-        t1, ug, expected_and_grad.grad_1
-    )
+    test_unary_op_backward[
+        OP.SQUEEZE, t1_shape, ug_shape, AttributeVector(dims)
+    ](t1, ug, expected_and_grad.grad_1)
 
 
 fn test_UNSQUEEZE() raises:
@@ -298,13 +338,15 @@ fn test_UNSQUEEZE() raises:
 
     alias dim = Attribute("dims", TensorShape(1))
 
-    var expected_and_grad = torch_unary_op(OP.UNSQUEEZE, t1, ug, AttributeVector(dim))
+    var expected_and_grad = torch_unary_op(
+        OP.UNSQUEEZE, t1, ug, AttributeVector(dim)
+    )
     test_unary_op[OP.UNSQUEEZE, t1_shape, AttributeVector(dim)](
         t1, expected_and_grad.expected
     )
-    test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape, AttributeVector(dim)](
-        t1, ug, expected_and_grad.grad_1
-    )
+    test_unary_op_backward[
+        OP.UNSQUEEZE, t1_shape, ug_shape, AttributeVector(dim)
+    ](t1, ug, expected_and_grad.grad_1)
 
     # Unsqueeze with multiple dims
     alias ug_shape_2 = TensorShape(20, 1, 28, 1)
@@ -321,9 +363,9 @@ fn test_UNSQUEEZE() raises:
     test_unary_op[OP.UNSQUEEZE, t1_shape, AttributeVector(dims)](
         t1, expected_and_grad.expected
     )
-    test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape_2, AttributeVector(dims)](
-        t1, ug, expected_and_grad.grad_1
-    )
+    test_unary_op_backward[
+        OP.UNSQUEEZE, t1_shape, ug_shape_2, AttributeVector(dims)
+    ](t1, ug, expected_and_grad.grad_1)
 
 
 fn test_SLICE() raises:
@@ -337,17 +379,23 @@ fn test_SLICE() raises:
         Attribute("starts", TensorShape(slice_0.start)),
         Attribute("ends", TensorShape(slice_0.end)),
         Attribute("steps", TensorShape(slice_0.step)),
-        Attribute("axes", TensorShape(0))
+        Attribute("axes", TensorShape(0)),
     )
 
     alias ug_shape = TensorShape(65, 322, 317)
     var ug = Tensor[dtype](ug_shape)
     rand(ug.data(), ug.num_elements())
 
-    var attrs_tuple_0 = PythonObject((slice_0.start, slice_0.end, slice_0.step, 0))
-    var expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0)
+    var attrs_tuple_0 = PythonObject(
+        (slice_0.start, slice_0.end, slice_0.step, 0)
+    )
+    var expected_and_grad = torch_unary_op(
+        OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0
+    )
     test_unary_op[OP.SLICE, t1_shape, attrs_0](t1, expected_and_grad.expected)
-    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape, attrs_0](t1, ug, expected_and_grad.grad_1)
+    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape, attrs_0](
+        t1, ug, expected_and_grad.grad_1
+    )
 
     # dim = 1
     alias slice_1 = Slice(10, 311, 5)
@@ -355,17 +403,23 @@ fn test_SLICE() raises:
         Attribute("starts", TensorShape(slice_1.start)),
         Attribute("ends", TensorShape(slice_1.end)),
         Attribute("steps", TensorShape(slice_1.step)),
-        Attribute("axes", TensorShape(1))
+        Attribute("axes", TensorShape(1)),
     )
 
     alias ug_shape_1 = TensorShape(430, 61, 317)
     ug = Tensor[dtype](ug_shape_1)
     rand(ug.data(), ug.num_elements())
 
-    var attrs_tuple_1 = PythonObject((slice_1.start, slice_1.end, slice_1.step, 1))
-    expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_1)
+    var attrs_tuple_1 = PythonObject(
+        (slice_1.start, slice_1.end, slice_1.step, 1)
+    )
+    expected_and_grad = torch_unary_op(
+        OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_1
+    )
     test_unary_op[OP.SLICE, t1_shape, attrs_1](t1, expected_and_grad.expected)
-    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_1, attrs_1](t1, ug, expected_and_grad.grad_1)
+    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_1, attrs_1](
+        t1, ug, expected_and_grad.grad_1
+    )
 
     # dim = 2
     alias slice_2 = Slice(293, 33, -7)
@@ -373,20 +427,26 @@ fn test_SLICE() raises:
         Attribute("starts", TensorShape(slice_2.start)),
         Attribute("ends", TensorShape(slice_2.end)),
         Attribute("steps", TensorShape(slice_2.step)),
-        Attribute("axes", TensorShape(2))
+        Attribute("axes", TensorShape(2)),
     )
 
     alias ug_shape_2 = TensorShape(430, 322, 38)
     ug = Tensor[dtype](ug_shape_2)
     rand(ug.data(), ug.num_elements())
 
-    var attrs_tuple_2 = PythonObject((slice_2.start, slice_2.end, slice_2.step, 2))
-    expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_2)
+    var attrs_tuple_2 = PythonObject(
+        (slice_2.start, slice_2.end, slice_2.step, 2)
+    )
+    expected_and_grad = torch_unary_op(
+        OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_2
+    )
     test_unary_op[OP.SLICE, t1_shape, attrs_2](t1, expected_and_grad.expected)
-    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_2, attrs_2](t1, ug, expected_and_grad.grad_1)
+    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_2, attrs_2](
+        t1, ug, expected_and_grad.grad_1
+    )
 
     # Multiple dims
-    
+
     # dim = 0, 1
     alias slice_0_1 = Slice(23, 340, 3)
     alias slice_1_1 = Slice(10, 250, 5)
@@ -395,17 +455,32 @@ fn test_SLICE() raises:
         Attribute("starts", TensorShape(slice_0_1.start, slice_1_1.start)),
         Attribute("ends", TensorShape(slice_0_1.end, slice_1_1.end)),
         Attribute("steps", TensorShape(slice_0_1.step, slice_1_1.step)),
-        Attribute("axes", TensorShape(0, 1))
+        Attribute("axes", TensorShape(0, 1)),
     )
 
     alias ug_shape_0_1 = TensorShape(106, 48, 317)
     ug = Tensor[dtype](ug_shape_0_1)
     rand(ug.data(), ug.num_elements())
 
-    var attrs_tuple_0_1 = PythonObject((slice_0_1.start, slice_0_1.end, slice_0_1.step, 0, slice_1_1.start, slice_1_1.end, slice_1_1.step, 1))
-    expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_1)
+    var attrs_tuple_0_1 = PythonObject(
+        (
+            slice_0_1.start,
+            slice_0_1.end,
+            slice_0_1.step,
+            0,
+            slice_1_1.start,
+            slice_1_1.end,
+            slice_1_1.step,
+            1,
+        )
+    )
+    expected_and_grad = torch_unary_op(
+        OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_1
+    )
     test_unary_op[OP.SLICE, t1_shape, attrs_0_1](t1, expected_and_grad.expected)
-    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_1, attrs_0_1](t1, ug, expected_and_grad.grad_1)
+    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_1, attrs_0_1](
+        t1, ug, expected_and_grad.grad_1
+    )
 
     # dim = 0, 1, 2
     alias slice_0_2 = Slice(-412, -5, 3)
@@ -413,20 +488,46 @@ fn test_SLICE() raises:
     alias slice_2_2 = Slice(293, 33, -7)
 
     alias attrs_0_2 = AttributeVector(
-        Attribute("starts", TensorShape(slice_0_2.start, slice_1_2.start, slice_2_2.start)),
-        Attribute("ends", TensorShape(slice_0_2.end, slice_1_2.end, slice_2_2.end)),
-        Attribute("steps", TensorShape(slice_0_2.step, slice_1_2.step, slice_2_2.step)),
-        Attribute("axes", TensorShape(0, 1, 2))
+        Attribute(
+            "starts",
+            TensorShape(slice_0_2.start, slice_1_2.start, slice_2_2.start),
+        ),
+        Attribute(
+            "ends", TensorShape(slice_0_2.end, slice_1_2.end, slice_2_2.end)
+        ),
+        Attribute(
+            "steps", TensorShape(slice_0_2.step, slice_1_2.step, slice_2_2.step)
+        ),
+        Attribute("axes", TensorShape(0, 1, 2)),
     )
 
     alias ug_shape_0_2 = TensorShape(136, 35, 38)
     ug = Tensor[dtype](ug_shape_0_2)
     rand(ug.data(), ug.num_elements())
 
-    var attrs_tuple_0_2 = PythonObject((slice_0_2.start, slice_0_2.end, slice_0_2.step, 0, slice_1_2.start, slice_1_2.end, slice_1_2.step, 1, slice_2_2.start, slice_2_2.end, slice_2_2.step, 2))
-    expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_2)
+    var attrs_tuple_0_2 = PythonObject(
+        (
+            slice_0_2.start,
+            slice_0_2.end,
+            slice_0_2.step,
+            0,
+            slice_1_2.start,
+            slice_1_2.end,
+            slice_1_2.step,
+            1,
+            slice_2_2.start,
+            slice_2_2.end,
+            slice_2_2.step,
+            2,
+        )
+    )
+    expected_and_grad = torch_unary_op(
+        OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_2
+    )
     test_unary_op[OP.SLICE, t1_shape, attrs_0_2](t1, expected_and_grad.expected)
-    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_2, attrs_0_2](t1, ug, expected_and_grad.grad_1)
+    test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_2, attrs_0_2](
+        t1, ug, expected_and_grad.grad_1
+    )
 
 
 fn main():

From 871371f712a9da20f5ad8c0bb610c48224640be4 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Fri, 31 May 2024 10:28:53 -0400
Subject: [PATCH 15/16] Added torch compatibility test for hard tanh

---
 tests/python/test_mlops_torch.mojo | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo
index a5d3b27f..2cdbf043 100644
--- a/tests/python/test_mlops_torch.mojo
+++ b/tests/python/test_mlops_torch.mojo
@@ -54,6 +54,12 @@ fn torch_unary_op(
             )
         elif op == OP.TANH:
             expected = torch.tanh(input_1)
+        elif op == OP.HARDTANH:
+            expected = torch.nn.functional.hardtanh(
+                input_1,
+                min_val=attrs.value()["min_val"].value().to_scalar[dtype](),
+                max_val=attrs.value()["max_val"].value().to_scalar[dtype](),
+            )
         elif op == OP.CLIP:
             var min_attr = attrs.value()["min"]
             var max_attr = attrs.value()["max"]
@@ -209,6 +215,29 @@ fn test_TANH() raises:
     )
 
 
+fn test_HARDTANH() raises:
+    alias t1_shape = TensorShape(37, 63, 107)
+    alias ug_shape = TensorShape(37, 63, 107)
+    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
+    rand(t1.data(), t1.num_elements())
+
+    var ug = Tensor[dtype](ug_shape)
+    rand(ug.data(), ug.num_elements())
+
+    var expected_and_grad = torch_unary_op(OP.HARDTANH, t1, ug)
+    test_unary_op[
+        OP.HARDTANH,
+        t1_shape,
+        AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)),
+    ](t1, expected_and_grad.expected)
+    test_unary_op_backward[
+        OP.HARDTANH,
+        t1_shape,
+        ug_shape,
+        AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)),
+    ](t1, ug, expected_and_grad.grad_1)
+
+
 fn test_CLIP() raises:
     alias t1_shape = TensorShape(37, 63, 107)
     alias ug_shape = TensorShape(37, 63, 107)

From 083e2918d7636d83ddb7189677596a7c8706e276 Mon Sep 17 00:00:00 2001
From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com>
Date: Fri, 7 Jun 2024 06:46:52 -0400
Subject: [PATCH 16/16] Removed broken threshold and hardtanh activation
 functions

---
 basalt/autograd/ops/mlops.mojo     | 125 -----------------------------
 basalt/autograd/ops/ops.mojo       |  16 ----
 basalt/nn/__init__.mojo            |   2 -
 basalt/nn/activations.mojo         |  30 -------
 tests/mojo/test_activations.mojo   |  87 --------------------
 tests/mojo/test_mlops.mojo         |  67 ----------------
 tests/python/test_mlops_torch.mojo |  31 +------
 7 files changed, 1 insertion(+), 357 deletions(-)

diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo
index defe508d..c9220506 100644
--- a/basalt/autograd/ops/mlops.mojo
+++ b/basalt/autograd/ops/mlops.mojo
@@ -7,65 +7,6 @@ from basalt.utils.tensorutils import elwise_transform
 from basalt.autograd.attributes import Attribute, AttributeVector
 
 
-struct THRESHOLD:
-    @staticmethod
-    fn result_shape(t1_shape: TensorShape) -> TensorShape:
-        return t1_shape
-
-    @staticmethod
-    fn forward[
-        t1_shape: TensorShape,
-        attributes: AttributeVector,
-    ](inout res: Tensor[dtype], t1: Tensor[dtype]):
-        """Forward pass for threshold."""
-
-        alias THRESHOLD: Scalar[dtype] = attributes[
-            "threshold"
-        ].value().to_scalar[dtype]()
-
-        alias VALUE: Scalar[dtype] = attributes["value"].value().to_scalar[
-            dtype
-        ]()
-
-        @always_inline
-        fn threshold[
-            type: DType, simd_width: Int
-        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
-            alias casted_threshold = THRESHOLD.cast[type]()
-            alias casted_value = VALUE.cast[type]()
-
-            return (x > x.splat(casted_threshold)).select[type](x, casted_value)
-
-        elwise_transform[threshold](res, t1)
-
-    @staticmethod
-    fn backward[
-        ug_shape: TensorShape,
-        t1_shape: TensorShape,
-        attributes: AttributeVector,
-    ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]:
-        """Backward pass for threshold."""
-        alias THRESHOLD: Scalar[dtype] = attributes[
-            "threshold"
-        ].value().to_scalar[dtype]()
-
-        alias VALUE: Scalar[dtype] = attributes["value"].value().to_scalar[
-            dtype
-        ]()
-
-        @always_inline
-        fn threshold_bw[
-            type: DType, simd_width: Int
-        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
-            return (x > x.splat(THRESHOLD.cast[type]())).select[type](1, 0)
-
-        var res_grad = Tensor[dtype](t1_shape)
-
-        elwise_transform[threshold_bw](res_grad, t1)
-
-        return res_grad^
-
-
 @value
 struct SIGMOID:
     @staticmethod
@@ -263,72 +204,6 @@ struct TANH:
         return res_grad^
 
 
-struct HARDTANH:
-    @staticmethod
-    fn result_shape(t1_shape: TensorShape) -> TensorShape:
-        return t1_shape
-
-    @staticmethod
-    fn forward[
-        t1_shape: TensorShape,
-        attributes: AttributeVector,
-    ](inout res: Tensor[dtype], t1: Tensor[dtype]):
-        """Forward pass for hard tanh."""
-
-        alias MIN_VAL: Scalar[dtype] = attributes["min_val"].value().to_scalar[
-            dtype
-        ]()
-
-        alias MAX_VAL: Scalar[dtype] = attributes["max_val"].value().to_scalar[
-            dtype
-        ]()
-
-        @always_inline
-        fn hardtanh[
-            type: DType, simd_width: Int
-        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
-            alias casted_min = MIN_VAL.cast[type]()
-            alias casted_max = MAX_VAL.cast[type]()
-
-            var x_or_min = (x > x.splat(casted_min)).select[type](x, casted_min)
-
-            return (x_or_min < x_or_min.splat(casted_max)).select[type](
-                x_or_min, casted_max
-            )
-
-        elwise_transform[hardtanh](res, t1)
-
-    @staticmethod
-    fn backward[
-        ug_shape: TensorShape,
-        t1_shape: TensorShape,
-        attributes: AttributeVector,
-    ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]:
-        """Backward pass for hard tanh."""
-        alias MIN_VAL: Scalar[dtype] = attributes["min_val"].value().to_scalar[
-            dtype
-        ]()
-
-        alias MAX_VAL: Scalar[dtype] = attributes["max_val"].value().to_scalar[
-            dtype
-        ]()
-
-        @always_inline
-        fn hardtanh_bw[
-            type: DType, simd_width: Int
-        ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]:
-            return (
-                x > x.splat(MIN_VAL.cast[type]())
-                and x < x.splat(MAX_VAL.cast[type]())
-            ).select[type](1, 0)
-
-        var res_grad = Tensor[dtype](t1_shape)
-
-        elwise_transform[hardtanh_bw](res_grad, t1)
-
-        return res_grad^
-
-
 struct CLIP:
     @staticmethod
     fn result_shape(t_shape: TensorShape) -> TensorShape:
diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo
index 53ace786..24221c5e 100644
--- a/basalt/autograd/ops/ops.mojo
+++ b/basalt/autograd/ops/ops.mojo
@@ -16,12 +16,10 @@ from .basics import (
     FMA,
 )
 from .mlops import (
-    THRESHOLD,
     SIGMOID,
     RELU,
     LEAKYRELU,
     TANH,
-    HARDTANH,
     CLIP,
     SQUEEZE,
     UNSQUEEZE,
@@ -72,8 +70,6 @@ struct OP(Stringable):
     alias CONCAT = OP(23, "CONCAT", dynamic=True)
     alias SPLIT = OP(24, "SPLIT", dynamic=True)
     alias SLICE = OP(25, "SLICE")
-    alias THRESHOLD = OP(26, "THRESHOLD")
-    alias HARDTANH = OP(27, "HARDTANH")
     alias LEAKYRELU = OP(28, "LEAKYRELU")
 
     var id: UInt8
@@ -137,8 +133,6 @@ fn static_result_shape(
         return FLATTEN.result_shape(t1_shape)
     elif op == OP.RESHAPE:
         return RESHAPE.result_shape(t1_shape, attributes)
-    elif op == OP.THRESHOLD:
-        return THRESHOLD.result_shape(t1_shape)
     elif op == OP.SIGMOID:
         return SIGMOID.result_shape(t1_shape)
     elif op == OP.RELU:
@@ -147,8 +141,6 @@ fn static_result_shape(
         return LEAKYRELU.result_shape(t1_shape)
     elif op == OP.TANH:
         return TANH.result_shape(t1_shape)
-    elif op == OP.HARDTANH:
-        return HARDTANH.result_shape(t1_shape)
     elif op == OP.TRANSPOSE:
         return TRANSPOSE.result_shape(t1_shape, attributes)
     elif op == OP.MAXPOOL2D:
@@ -257,8 +249,6 @@ fn forward_op[
         FLATTEN.forward[t1_shape](res, t1)
     elif op == OP.RESHAPE:
         RESHAPE.forward[t1_shape](res, t1)
-    elif op == OP.THRESHOLD:
-        THRESHOLD.forward[t1_shape, attributes](res, t1)
     elif op == OP.SIGMOID:
         SIGMOID.forward[t1_shape](res, t1)
     elif op == OP.RELU:
@@ -267,8 +257,6 @@ fn forward_op[
         LEAKYRELU.forward[t1_shape, attributes](res, t1)
     elif op == OP.TANH:
         TANH.forward[t1_shape](res, t1)
-    elif op == OP.HARDTANH:
-        HARDTANH.forward[t1_shape, attributes](res, t1)
     elif op == OP.TRANSPOSE:
         TRANSPOSE.forward[t1_shape, attributes](res, t1)
     elif op == OP.MAXPOOL2D:
@@ -381,8 +369,6 @@ fn backward_op[
         res_grad = FLATTEN.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.RESHAPE:
         res_grad = RESHAPE.backward[ug_shape, t1_shape](ug, t1)
-    elif op == OP.THRESHOLD:
-        res_grad = THRESHOLD.backward[ug_shape, t1_shape, attributes](ug, t1)
     elif op == OP.SIGMOID:
         res_grad = SIGMOID.backward[ug_shape, t1_shape](ug, t1)
     elif op == OP.RELU:
@@ -391,8 +377,6 @@ fn backward_op[
         res_grad = LEAKYRELU.backward[ug_shape, t1_shape, attributes](ug, t1)
     elif op == OP.TANH:
         res_grad = TANH.backward[ug_shape, t1_shape](ug, t1)
-    elif op == OP.HARDTANH:
-        res_grad = HARDTANH.backward[ug_shape, t1_shape, attributes](ug, t1)
     elif op == OP.TRANSPOSE:
         res_grad = TRANSPOSE.backward[ug_shape, t1_shape, attributes](ug, t1)
     elif op == OP.MAXPOOL2D:
diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo
index 47d56f33..c2a06607 100644
--- a/basalt/nn/__init__.mojo
+++ b/basalt/nn/__init__.mojo
@@ -13,6 +13,4 @@ from .activations import (
     LeakyReLU,
     Sigmoid,
     Tanh,
-    Hardtanh,
-    Threshold,
 )
diff --git a/basalt/nn/activations.mojo b/basalt/nn/activations.mojo
index 61ef51d7..9a83a0fd 100644
--- a/basalt/nn/activations.mojo
+++ b/basalt/nn/activations.mojo
@@ -4,21 +4,6 @@ from basalt.autograd.attributes import Attribute, AttributeVector
 
 
 # '''Activation functions.'''
-fn Threshold(
-    inout g: Graph,
-    input: Symbol,
-    threshold: Scalar[dtype],
-    value: Scalar[dtype],
-) -> Symbol:
-    return g.op(
-        OP.THRESHOLD,
-        input,
-        attributes=AttributeVector(
-            Attribute("threshold", threshold), Attribute("value", value)
-        ),
-    )
-
-
 fn ReLU(inout g: Graph, input: Symbol) -> Symbol:
     return g.op(OP.RELU, input)
 
@@ -41,21 +26,6 @@ fn Tanh(inout g: Graph, input: Symbol) -> Symbol:
     return g.op(OP.TANH, input)
 
 
-fn Hardtanh(
-    inout g: Graph,
-    input: Symbol,
-    min_val: Scalar[dtype],
-    max_val: Scalar[dtype],
-) -> Symbol:
-    return g.op(
-        OP.HARDTANH,
-        input,
-        attributes=AttributeVector(
-            Attribute("min_val", min_val), Attribute("max_val", max_val)
-        ),
-    )
-
-
 fn Softmax(inout g: Graph, input: Symbol, axis: Int) -> Symbol:
     # softmax: exp(x_i) / sum(exp(x_j))
     # stable softmax: exp(x_i - max(x_j)) / sum(exp(x_j - max(x_j)))
diff --git a/tests/mojo/test_activations.mojo b/tests/mojo/test_activations.mojo
index eaee7bef..61a633b8 100644
--- a/tests/mojo/test_activations.mojo
+++ b/tests/mojo/test_activations.mojo
@@ -11,8 +11,6 @@ from basalt.nn import (
     LeakyReLU,
     Sigmoid,
     Tanh,
-    Hardtanh,
-    Threshold,
 )
 from basalt.autograd import Graph, Symbol
 from basalt.utils.tensorutils import fill
@@ -25,12 +23,6 @@ alias AxisActivation = fn (inout g: Graph, input: Symbol, axis: Int) -> Symbol
 alias LeakyReLUActivation = fn (
     inout g: Graph, input: Symbol, negative_slope: Scalar[dtype]
 ) -> Symbol
-alias ThresholdActivation = fn (
-    inout g: Graph,
-    input: Symbol,
-    threshold: Scalar[dtype],
-    value: Scalar[dtype],
-) -> Symbol
 
 
 fn create_graph[
@@ -65,35 +57,6 @@ fn create_graph[shape: TensorShape, func: Activation]() -> Graph:
     return g^
 
 
-fn create_graph[
-    shape: TensorShape,
-    func: ThresholdActivation,
-    threshold: Scalar[dtype],
-    value: Scalar[dtype],
-]() -> Graph:
-    var g = Graph()
-    var x = g.input(shape)
-    var activation = func(g, x, threshold, value)
-    g.out(activation)
-    return g^
-
-
-fn test_graph[
-    shape: TensorShape,
-    func: ThresholdActivation,
-    nodes: Int,
-    threshold: Scalar[dtype],
-    value: Scalar[dtype],
-](input: Tensor[dtype], expected: Tensor[dtype]) raises:
-    alias graph = create_graph[shape, func, threshold, value]()
-
-    var model = Model[graph](inference_only=True)
-    var res = model.inference(input)[0]
-
-    assert_tensors_equal["almost"](res, expected)
-    assert_equal(len(graph.nodes), nodes)
-
-
 fn test_graph[
     shape: TensorShape,
     func: AxisActivation,
@@ -139,26 +102,6 @@ fn test_graph[
     assert_equal(len(graph.nodes), nodes, "Node count failed")
 
 
-fn test_THRESHOLD() raises:
-    alias shape = TensorShape(2, 3)
-    alias nodes = 1
-
-    alias THRESHOLD = 3
-    alias VALUE = 2
-
-    var input = Tensor[dtype](shape)
-
-    for i in range(6):
-        input[i] = i
-
-    var expected = Tensor[dtype](shape)
-
-    for i in range(6):
-        expected[i] = i if i > THRESHOLD else VALUE
-
-    test_graph[shape, Threshold, nodes, THRESHOLD, VALUE](input, expected)
-
-
 fn test_SOFTMAX() raises:
     alias shape = TensorShape(2, 3, 2)
     alias nodes = 5
@@ -259,43 +202,13 @@ fn test_TANH() raises:
     test_graph[shape, Tanh, nodes](input, expected)
 
 
-fn test_HARDTANH() raises:
-    alias shape = TensorShape(3, 3)
-    alias nodes = 1
-
-    alias MIN_VAL = -2
-    alias MAX_VAL = 2
-
-    var input = Tensor[dtype](shape)
-
-    for i in range(9):
-        input[i] = i - 4
-
-    var expected = Tensor[dtype](shape)
-
-    for j in range(0, 9):
-        var i = j - 4
-        if i < MIN_VAL:
-            expected[j] = MIN_VAL
-
-        elif i > MAX_VAL:
-            expected[j] = MAX_VAL
-
-        else:
-            expected[j] = i
-
-    test_graph[shape, Hardtanh, nodes, MIN_VAL, MAX_VAL](input, expected)
-
-
 fn main():
     try:
-        test_THRESHOLD()
         test_SOFTMAX()
         test_LOGSOFTMAX()
         test_RELU()
         test_SIGMOID()
         test_TANH()
-        test_HARDTANH()
     except e:
         print("[ERROR] Error in activations")
         print(e)
diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo
index fe3ddb5d..620f00f1 100644
--- a/tests/mojo/test_mlops.mojo
+++ b/tests/mojo/test_mlops.mojo
@@ -4,10 +4,8 @@ from basalt.autograd.attributes import AttributeVector, Attribute
 from basalt.autograd.ops.mlops import (
     SIGMOID,
     RELU,
-    THRESHOLD,
     LEAKYRELU,
     TANH,
-    HARDTANH,
     CLIP,
     SQUEEZE,
     UNSQUEEZE,
@@ -23,39 +21,6 @@ from tests import (
 )
 
 
-fn test_THRESHOLD() raises:
-    alias t1_shape = TensorShape(2, 3)
-    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
-    fill(t1, 4.0)
-
-    var expected = Tensor[dtype](2, 3)
-    fill(expected, 4.0)
-
-    test_unary_op[
-        OP.THRESHOLD,
-        t1_shape,
-        AttributeVector(Attribute("threshold", 3), Attribute("value", 2)),
-    ](t1, expected)
-
-
-fn test_backward_THRESHOLD() raises:
-    alias t1_shape = TensorShape(2, 3)
-    alias ug_shape = TensorShape(2, 3)
-    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
-    var ug: Tensor[dtype] = Tensor[dtype](ug_shape)
-    fill(ug, 2.0)
-
-    var expected_grad = Tensor[dtype](2, 3)
-    fill(expected_grad, 0)
-
-    test_unary_op_backward[
-        OP.THRESHOLD,
-        t1_shape,
-        ug_shape,
-        AttributeVector(Attribute("threshold", 3), Attribute("value", 2)),
-    ](t1, ug, expected_grad)
-
-
 fn test_SIGMOID() raises:
     alias t1_shape = TensorShape(2, 3)
     var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
@@ -191,38 +156,6 @@ fn test_backward_TANH() raises:
     test_unary_op_backward[OP.TANH, t1_shape, ug_shape](t1, ug, expected_grad)
 
 
-fn test_HARDTANH() raises:
-    alias t1_shape = TensorShape(2, 3)
-    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
-
-    var expected = Tensor[dtype](2, 3)
-    fill(expected, 0.0)
-
-    test_unary_op[
-        OP.HARDTANH,
-        t1_shape,
-        AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)),
-    ](t1, expected)
-
-
-fn test_backward_HARDTANH() raises:
-    alias t1_shape = TensorShape(2, 3)
-    alias ug_shape = TensorShape(2, 3)
-    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
-    var ug: Tensor[dtype] = Tensor[dtype](ug_shape)
-    fill(ug, 5.0)
-
-    var expected_grad = Tensor[dtype](2, 3)
-    fill(expected_grad, 0)  # 5 > 3, so slope is 0.
-
-    test_unary_op_backward[
-        OP.HARDTANH,
-        t1_shape,
-        ug_shape,
-        AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)),
-    ](t1, ug, expected_grad)
-
-
 fn test_CLIP() raises:
     alias t1_shape = TensorShape(2, 3)
     var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo
index 2cdbf043..5cba639c 100644
--- a/tests/python/test_mlops_torch.mojo
+++ b/tests/python/test_mlops_torch.mojo
@@ -43,7 +43,7 @@ fn torch_unary_op(
 
         var expected: PythonObject
 
-        if op == OP.SIGMOID:
+        elif op == OP.SIGMOID:
             expected = torch.sigmoid(input_1)
         elif op == OP.RELU:
             expected = torch.relu(input_1)
@@ -54,12 +54,6 @@ fn torch_unary_op(
             )
         elif op == OP.TANH:
             expected = torch.tanh(input_1)
-        elif op == OP.HARDTANH:
-            expected = torch.nn.functional.hardtanh(
-                input_1,
-                min_val=attrs.value()["min_val"].value().to_scalar[dtype](),
-                max_val=attrs.value()["max_val"].value().to_scalar[dtype](),
-            )
         elif op == OP.CLIP:
             var min_attr = attrs.value()["min"]
             var max_attr = attrs.value()["max"]
@@ -215,29 +209,6 @@ fn test_TANH() raises:
     )
 
 
-fn test_HARDTANH() raises:
-    alias t1_shape = TensorShape(37, 63, 107)
-    alias ug_shape = TensorShape(37, 63, 107)
-    var t1: Tensor[dtype] = Tensor[dtype](t1_shape)
-    rand(t1.data(), t1.num_elements())
-
-    var ug = Tensor[dtype](ug_shape)
-    rand(ug.data(), ug.num_elements())
-
-    var expected_and_grad = torch_unary_op(OP.HARDTANH, t1, ug)
-    test_unary_op[
-        OP.HARDTANH,
-        t1_shape,
-        AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)),
-    ](t1, expected_and_grad.expected)
-    test_unary_op_backward[
-        OP.HARDTANH,
-        t1_shape,
-        ug_shape,
-        AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)),
-    ](t1, ug, expected_and_grad.grad_1)
-
-
 fn test_CLIP() raises:
     alias t1_shape = TensorShape(37, 63, 107)
     alias ug_shape = TensorShape(37, 63, 107)