From 0e6bc32140f52434a72d1958096fbfd0aba194c3 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 16 May 2024 17:05:55 -0400 Subject: [PATCH 01/16] Implemented threshold I implemented the threshold activation function as well as unit tests to verify its functionality. --- basalt/autograd/ops/mlops.mojo | 260 +++++++++++++++++++++++-------- basalt/autograd/ops/ops.mojo | 95 ++++++++--- basalt/nn/__init__.mojo | 2 +- basalt/nn/activations.mojo | 14 ++ tests/mojo/test_activations.mojo | 61 +++++++- 5 files changed, 337 insertions(+), 95 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 08699199..4286420c 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -7,6 +7,64 @@ from basalt.utils.tensorutils import elwise_transform from basalt.autograd.attributes import Attribute, AttributeVector +struct THRESHOLD: + @staticmethod + fn result_shape(t1_shape: TensorShape) -> TensorShape: + return t1_shape + + @staticmethod + fn forward[ + t1_shape: TensorShape, + attributes: AttributeVector, + ](inout res: Tensor[dtype], t1: Tensor[dtype]): + """Forward pass for threshold.""" + + alias THRESHOLD: Scalar[dtype] = attributes[ + "threshold" + ].value().to_scalar[dtype]() + + alias VALUE: Scalar[dtype] = attributes["value"].value().to_scalar[ + dtype + ]() + + @always_inline + fn threshold[ + type: DType, simd_width: Int + ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: + return (x > x.splat(THRESHOLD.cast[type]())).select[type]( + x, VALUE.cast[type]() + ) # Feels like using AttributeVector made this unnecessarily complicated + + elwise_transform[threshold](res, t1) + + @staticmethod + fn backward[ + ug_shape: TensorShape, + t1_shape: TensorShape, + attributes: AttributeVector, + ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: + """Backward pass for threshold.""" + alias THRESHOLD: Scalar[dtype] = attributes[ + "threshold" + ].value().to_scalar[dtype]() + + alias VALUE: Scalar[dtype] = attributes["value"].value().to_scalar[ + dtype + ]() + + @always_inline + fn threshold[ + type: DType, simd_width: Int + ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: + return (x > x.splat(THRESHOLD.cast[type]())).select[type](1, 0) + + var res_grad = Tensor[dtype](t1_shape) + + elwise_transform[threshold](res_grad, t1) + + return res_grad^ + + @value struct SIGMOID: @staticmethod @@ -52,7 +110,7 @@ struct SIGMOID: vectorize[vec_sigmoid_bw, nelts](ug_shape.num_elements()) - return res_grad ^ + return res_grad^ struct RELU: @@ -100,7 +158,7 @@ struct RELU: vectorize[vec_relu_bw, nelts](ug_shape.num_elements()) - return res_grad ^ + return res_grad^ struct TANH: @@ -146,7 +204,7 @@ struct TANH: vectorize[vec_tanh_bw, nelts](ug_shape.num_elements()) - return res_grad ^ + return res_grad^ struct CLIP: @@ -164,12 +222,12 @@ struct CLIP: alias min_attr = attributes["min"] alias max_attr = attributes["max"] - var min_val = min_attr.value().to_scalar[dtype]() if min_attr else min_finite[ + var min_val = min_attr.value().to_scalar[ dtype - ]() - var max_val = max_attr.value().to_scalar[dtype]() if max_attr else max_finite[ + ]() if min_attr else min_finite[dtype]() + var max_val = max_attr.value().to_scalar[ dtype - ]() + ]() if max_attr else max_finite[dtype]() @parameter fn vec_clip[nelts: Int](i: Int): @@ -187,12 +245,12 @@ struct CLIP: alias min_attr = attributes["min"] alias max_attr = attributes["max"] - var min_val = min_attr.value().to_scalar[dtype]() if min_attr else min_finite[ + var min_val = min_attr.value().to_scalar[ dtype - ]() - var max_val = max_attr.value().to_scalar[dtype]() if max_attr else max_finite[ + ]() if min_attr else min_finite[dtype]() + var max_val = max_attr.value().to_scalar[ dtype - ]() + ]() if max_attr else max_finite[dtype]() var res_grad = Tensor[dtype](t_shape) @@ -201,17 +259,21 @@ struct CLIP: var val = t.load[nelts](i) res_grad.store[nelts]( i, - ((val >= min_val) * (val <= max_val)).select(ug.load[nelts](i), 0), + ((val >= min_val) * (val <= max_val)).select( + ug.load[nelts](i), 0 + ), ) vectorize[vec_clip_bw, nelts, size = t_shape.num_elements()]() - return res_grad ^ + return res_grad^ struct SQUEEZE: @staticmethod - fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape: + fn result_shape( + t1_shape: TensorShape, attributes: AttributeVector + ) -> TensorShape: var dim = attributes["dims"] var dims_to_squeeze = dim.value().to_shape() if dim else TensorShape() @@ -239,12 +301,14 @@ struct SQUEEZE: ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: var res_grad = Tensor[dtype](t1_shape) memcpy(res_grad.data(), ug.data(), ug.num_elements()) - return res_grad ^ + return res_grad^ struct UNSQUEEZE: @staticmethod - fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape: + fn result_shape( + t1_shape: TensorShape, attributes: AttributeVector + ) -> TensorShape: var dim = attributes["dims"] var dims_to_squeeze = dim.value().to_shape() if dim else TensorShape() @@ -276,7 +340,7 @@ struct UNSQUEEZE: ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: var res_grad = Tensor[dtype](t1_shape) memcpy(res_grad.data(), ug.data(), ug.num_elements()) - return res_grad ^ + return res_grad^ struct SLICE: @@ -285,7 +349,7 @@ struct SLICE: # Adjust negative indices & ensure they are within bounds. var s = slice if slice >= 0 else dim_size + slice return max(min(s, dim_size), 0) - + @staticmethod fn default_starts(shape: TensorShape) -> List[Int]: var starts = List[Int]() @@ -306,7 +370,7 @@ struct SLICE: for i in range(shape.rank()): steps.append(1) return steps^ - + @staticmethod fn default_axes(shape: TensorShape) -> List[Int]: # NOTE: axes can't be negative @@ -316,38 +380,55 @@ struct SLICE: return axes^ @staticmethod - fn result_shape(t1_shape: TensorShape, attributes: AttributeVector) -> TensorShape: + fn result_shape( + t1_shape: TensorShape, attributes: AttributeVector + ) -> TensorShape: # NOTE: Starts and ends have to be of the same size # NOTE: If axes not provided, starts and ends have to be of the same size as t1_shape var starts = attributes["starts"].value().to_shape() var ends = attributes["ends"].value().to_shape() - var steps = attributes["steps"].value().to_shape() if attributes["steps"] else Self.default_steps(starts) - var axes = attributes["axes"].value().to_shape() if attributes["axes"] else Self.default_axes(t1_shape) + var steps = attributes["steps"].value().to_shape() if attributes[ + "steps" + ] else Self.default_steps(starts) + var axes = attributes["axes"].value().to_shape() if attributes[ + "axes" + ] else Self.default_axes(t1_shape) var new_shape = t1_shape for i in range(starts.rank()): var axis = axes[i] - new_shape[axis] = len(range( - start = Self.adjust_boundary(starts[i], t1_shape[axis]), - end = Self.adjust_boundary(ends[i], t1_shape[axis]), - step = steps[i] - )) + new_shape[axis] = len( + range( + start=Self.adjust_boundary(starts[i], t1_shape[axis]), + end=Self.adjust_boundary(ends[i], t1_shape[axis]), + step=steps[i], + ) + ) return new_shape @staticmethod - fn reorder_positions[id: Int](original: TensorShape, axes: TensorShape, t1_shape: TensorShape) -> List[Int]: + fn reorder_positions[ + id: Int + ](original: TensorShape, axes: TensorShape, t1_shape: TensorShape) -> List[ + Int + ]: # Reorder the starts (id=0), ends (id=1) or steps (id=2) to match the order of the axes var updated: List[Int] @parameter - if id == 0: updated = Self.default_starts(t1_shape) - elif id == 1: updated = Self.default_ends(t1_shape) - else: updated = Self.default_steps(t1_shape) - + if id == 0: + updated = Self.default_starts(t1_shape) + elif id == 1: + updated = Self.default_ends(t1_shape) + else: + updated = Self.default_steps(t1_shape) + for i in range(axes.rank()): var axis = axes[i] - updated[axis] = original[i] if id == 2 else Self.adjust_boundary(original[i], t1_shape[axis]) + updated[axis] = original[i] if id == 2 else Self.adjust_boundary( + original[i], t1_shape[axis] + ) return updated^ @@ -360,12 +441,12 @@ struct SLICE: steps: List[Int], starts: List[Int], ends: List[Int], - backward_op: Bool = False + backward_op: Bool = False, ]( inout res: Tensor[dtype], t1: Tensor[dtype], last_dims: Int, - position: Int, + position: Int, last_position: Int, idx: Int, idx_original: Int, @@ -374,7 +455,9 @@ struct SLICE: alias t1_strides = original_shape.strides() var idx_temp = idx - var idx_original_temp = starts[position] * t1_strides[position] + idx_original + var idx_original_temp = starts[position] * t1_strides[ + position + ] + idx_original if position == last_position + 1: # Work on the last dimensions @@ -382,37 +465,51 @@ struct SLICE: alias stride = t1_strides[position] * steps[position] @parameter - fn v_slice[nelts: Int](k : Int): - + fn v_slice[nelts: Int](k: Int): @parameter if not backward_op: + @parameter if steps[position] == 1: - res.store[nelts](idx_temp + k, t1.load[nelts](idx_original_temp)) + res.store[nelts]( + idx_temp + k, t1.load[nelts](idx_original_temp) + ) else: res.store[nelts]( idx_temp + k, - t1.data().offset(idx_original_temp).simd_strided_load[nelts](stride) + t1.data() + .offset(idx_original_temp) + .simd_strided_load[nelts](stride), ) else: + @parameter if steps[position] == 1: - res.store[nelts](idx_original_temp, t1.load[nelts](idx_temp + k)) - else: - res.data().offset(idx_original_temp).simd_strided_store[nelts]( - t1.load[nelts](idx_temp + k), - stride + res.store[nelts]( + idx_original_temp, t1.load[nelts](idx_temp + k) ) - + else: + res.data().offset(idx_original_temp).simd_strided_store[ + nelts + ](t1.load[nelts](idx_temp + k), stride) + idx_original_temp += stride * nelts vectorize[v_slice, nelts](last_dims) - return + return for _ in range(shape[position]): - Self.recursive_iters_slice[shape, original_shape, steps, starts, ends, backward_op]( - res, t1, last_dims, position + 1, last_position, idx_temp, idx_original_temp + Self.recursive_iters_slice[ + shape, original_shape, steps, starts, ends, backward_op + ]( + res, + t1, + last_dims, + position + 1, + last_position, + idx_temp, + idx_original_temp, ) idx_temp += strides[position] @@ -425,10 +522,10 @@ struct SLICE: steps: List[Int], starts: List[Int], ends: List[Int], - backward_op: Bool = False + backward_op: Bool = False, ](inout res: Tensor[dtype], t1: Tensor[dtype]): alias strides = original_shape.strides() - + # Get the dimensions for vectorization var last_dims = 1 var positions_to_skip = 0 @@ -439,7 +536,7 @@ struct SLICE: positions_to_skip += 1 if starts[i] != 0 or ends[i] != original_shape[i] or steps[i] != 1: break - + # Get the dimensions for the first loop var first_dims = 1 var start_position = 0 @@ -450,31 +547,46 @@ struct SLICE: start_position += 1 var middle_dims = res_shape.num_elements() // last_dims // first_dims - + @parameter fn p_slice(i: Int): Self.recursive_iters_slice[ res_shape, original_shape, steps, starts, ends, backward_op ]( - res, t1, last_dims, start_position, res_shape.rank() - 1 - positions_to_skip, - i * middle_dims * last_dims, i * strides[start_position - 1] + res, + t1, + last_dims, + start_position, + res_shape.rank() - 1 - positions_to_skip, + i * middle_dims * last_dims, + i * strides[start_position - 1], ) parallelize[p_slice](first_dims) - + @staticmethod fn forward[ t1_shape: TensorShape, attributes: AttributeVector, ](inout res: Tensor[dtype], t1: Tensor[dtype]): - alias axes = attributes["axes"].value().to_shape() if attributes["axes"] else Self.default_axes(t1_shape) - alias starts = Self.reorder_positions[0](attributes["starts"].value().to_shape(), axes, t1_shape) - alias ends = Self.reorder_positions[1](attributes["ends"].value().to_shape(), axes, t1_shape) - alias steps = Self.reorder_positions[2](attributes["steps"].value().to_shape(), axes, t1_shape) if attributes["steps"] else Self.default_steps(t1_shape) + alias axes = attributes["axes"].value().to_shape() if attributes[ + "axes" + ] else Self.default_axes(t1_shape) + alias starts = Self.reorder_positions[0]( + attributes["starts"].value().to_shape(), axes, t1_shape + ) + alias ends = Self.reorder_positions[1]( + attributes["ends"].value().to_shape(), axes, t1_shape + ) + alias steps = Self.reorder_positions[2]( + attributes["steps"].value().to_shape(), axes, t1_shape + ) if attributes["steps"] else Self.default_steps(t1_shape) alias res_shape = Self.result_shape(t1_shape, attributes) - Self.slice_kernel[res_shape, t1_shape, steps, starts, ends, False](res, t1) + Self.slice_kernel[res_shape, t1_shape, steps, starts, ends, False]( + res, t1 + ) @staticmethod fn backward[ @@ -482,13 +594,23 @@ struct SLICE: t1_shape: TensorShape, attributes: AttributeVector = AttributeVector(), ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: - alias axes = attributes["axes"].value().to_shape() if attributes["axes"] else Self.default_axes(t1_shape) - alias starts = Self.reorder_positions[0](attributes["starts"].value().to_shape(), axes, t1_shape) - alias ends = Self.reorder_positions[1](attributes["ends"].value().to_shape(), axes, t1_shape) - alias steps = Self.reorder_positions[2](attributes["steps"].value().to_shape(), axes, t1_shape) if attributes["steps"] else Self.default_steps(t1_shape) + alias axes = attributes["axes"].value().to_shape() if attributes[ + "axes" + ] else Self.default_axes(t1_shape) + alias starts = Self.reorder_positions[0]( + attributes["starts"].value().to_shape(), axes, t1_shape + ) + alias ends = Self.reorder_positions[1]( + attributes["ends"].value().to_shape(), axes, t1_shape + ) + alias steps = Self.reorder_positions[2]( + attributes["steps"].value().to_shape(), axes, t1_shape + ) if attributes["steps"] else Self.default_steps(t1_shape) var res_grad = Tensor[dtype](t1_shape) - - Self.slice_kernel[ug_shape, t1_shape, steps, starts, ends, True](res_grad, ug) - - return res_grad ^ \ No newline at end of file + + Self.slice_kernel[ug_shape, t1_shape, steps, starts, ends, True]( + res_grad, ug + ) + + return res_grad^ diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo index 71982706..49b87bc8 100644 --- a/basalt/autograd/ops/ops.mojo +++ b/basalt/autograd/ops/ops.mojo @@ -15,7 +15,16 @@ from .basics import ( TRANSPOSE, FMA, ) -from .mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE, SLICE +from .mlops import ( + THRESHOLD, + SIGMOID, + RELU, + TANH, + CLIP, + SQUEEZE, + UNSQUEEZE, + SLICE, +) from .dynamics import CONCAT, SPLIT from .conv import CONV2D from .pool import MAXPOOL2D @@ -61,6 +70,7 @@ struct OP(Stringable): alias CONCAT = OP(23, "CONCAT", dynamic=True) alias SPLIT = OP(24, "SPLIT", dynamic=True) alias SLICE = OP(25, "SLICE") + alias THRESHOLD = OP(26, "THRESHOLD") var id: UInt8 var name: Bytes[16] @@ -87,10 +97,16 @@ fn static_result_shape( if len(operands) == 1: return static_result_shape(op, operands[0].shape, attributes) elif len(operands) == 2: - return static_result_shape(op, operands[0].shape, operands[1].shape, attributes) + return static_result_shape( + op, operands[0].shape, operands[1].shape, attributes + ) elif len(operands) == 3: return static_result_shape( - op, operands[0].shape, operands[1].shape, operands[2].shape, attributes + op, + operands[0].shape, + operands[1].shape, + operands[2].shape, + attributes, ) else: print("Error: Invalid number of operands") @@ -117,6 +133,8 @@ fn static_result_shape( return FLATTEN.result_shape(t1_shape) elif op == OP.RESHAPE: return RESHAPE.result_shape(t1_shape, attributes) + elif op == OP.THRESHOLD: + return THRESHOLD.result_shape(t1_shape) elif op == OP.SIGMOID: return SIGMOID.result_shape(t1_shape) elif op == OP.RELU: @@ -231,6 +249,8 @@ fn forward_op[ FLATTEN.forward[t1_shape](res, t1) elif op == OP.RESHAPE: RESHAPE.forward[t1_shape](res, t1) + elif op == OP.THRESHOLD: + THRESHOLD.forward[t1_shape, attributes](res, t1) elif op == OP.SIGMOID: SIGMOID.forward[t1_shape](res, t1) elif op == OP.RELU: @@ -254,7 +274,10 @@ fn forward_op[ fn forward_op[ - op: OP, t1_shape: TensorShape, t2_shape: TensorShape, attributes: AttributeVector + op: OP, + t1_shape: TensorShape, + t2_shape: TensorShape, + attributes: AttributeVector, ](inout res: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype]): """ Forward pass for binary operators. @@ -283,14 +306,21 @@ fn forward_op[ t2_shape: TensorShape, t3_shape: TensorShape, attributes: AttributeVector, -](inout res: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype], t3: Tensor[dtype]): +]( + inout res: Tensor[dtype], + t1: Tensor[dtype], + t2: Tensor[dtype], + t3: Tensor[dtype], +): """ Forward pass for ternary operators. """ @parameter if op == OP.CONV2D: - CONV2D.forward[t1_shape, t2_shape, t3_shape, attributes](res, t1, t2, t3) + CONV2D.forward[t1_shape, t2_shape, t3_shape, attributes]( + res, t1, t2, t3 + ) elif op == OP.FMA: FMA.forward[t1_shape, t2_shape, t3_shape](res, t1, t2, t3) else: @@ -300,11 +330,7 @@ fn forward_op[ fn forward_op[ op: OP, attributes: AttributeVector, -]( - inputs: List[Symbol], - outputs: List[Symbol], - parameters: Parameters, -): +](inputs: List[Symbol], outputs: List[Symbol], parameters: Parameters,): """ Forward pass for dynamic operators. """ @@ -343,6 +369,8 @@ fn backward_op[ res_grad = FLATTEN.backward[ug_shape, t1_shape](ug, t1) elif op == OP.RESHAPE: res_grad = RESHAPE.backward[ug_shape, t1_shape](ug, t1) + elif op == OP.THRESHOLD: + res_grad = THRESHOLD.backward[ug_shape, t1_shape](ug, t1) elif op == OP.SIGMOID: res_grad = SIGMOID.backward[ug_shape, t1_shape](ug, t1) elif op == OP.RELU: @@ -375,7 +403,12 @@ fn backward_op[ t1_shape: TensorShape, t2_shape: TensorShape, attributes: AttributeVector, -](ug: Tensor[dtype], t1: Tensor[dtype], t2: Tensor[dtype], inout grad: Tensor[dtype]): +]( + ug: Tensor[dtype], + t1: Tensor[dtype], + t2: Tensor[dtype], + inout grad: Tensor[dtype], +): """ Backward pass for binary operators. """ @@ -383,17 +416,29 @@ fn backward_op[ @parameter if op == OP.ADD: - res_grad = ADD.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = ADD.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.SUB: - res_grad = SUB.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = SUB.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.MUL: - res_grad = MUL.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = MUL.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.DIV: - res_grad = DIV.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = DIV.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.POW: - res_grad = POW.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = POW.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) elif op == OP.DOT: - res_grad = DOT.backward[tensor_id, ug_shape, t1_shape, t2_shape](ug, t1, t2) + res_grad = DOT.backward[tensor_id, ug_shape, t1_shape, t2_shape]( + ug, t1, t2 + ) else: print("[ERROR] Operator not found.") res_grad = Tensor[dtype](-1, -1) @@ -437,9 +482,9 @@ fn backward_op[ tensor_id, ug_shape, t1_shape, t2_shape, t3_shape, attributes ](ug, t1, t2, t3) elif op == OP.FMA: - res_grad = FMA.backward[tensor_id, ug_shape, t1_shape, t2_shape, t3_shape]( - ug, t1, t2, t3 - ) + res_grad = FMA.backward[ + tensor_id, ug_shape, t1_shape, t2_shape, t3_shape + ](ug, t1, t2, t3) else: print("[ERROR] Operator not found.") res_grad = Tensor[dtype](-1, -1) @@ -463,9 +508,13 @@ fn backward_op[ var res_grad: Tensor[dtype] if op == OP.CONCAT: - res_grad = CONCAT.backward[input_id, attributes](inputs, outputs, parameters) + res_grad = CONCAT.backward[input_id, attributes]( + inputs, outputs, parameters + ) elif op == OP.SPLIT: - res_grad = SPLIT.backward[input_id, attributes](inputs, outputs, parameters) + res_grad = SPLIT.backward[input_id, attributes]( + inputs, outputs, parameters + ) else: print("[ERROR] Operator not found.") res_grad = Tensor[dtype](-1, -1) diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo index 99b30a31..3ff64121 100644 --- a/basalt/nn/__init__.mojo +++ b/basalt/nn/__init__.mojo @@ -6,4 +6,4 @@ from .layers.conv import Conv2d from .layers.pool import MaxPool2d from .loss import MSELoss, CrossEntropyLoss -from .activations import Softmax, LogSoftmax, ReLU, Sigmoid, Tanh +from .activations import Softmax, LogSoftmax, ReLU, Sigmoid, Tanh, Threshold diff --git a/basalt/nn/activations.mojo b/basalt/nn/activations.mojo index 2264a541..44ecaa1d 100644 --- a/basalt/nn/activations.mojo +++ b/basalt/nn/activations.mojo @@ -2,7 +2,21 @@ from basalt import Tensor, TensorShape from basalt import Graph, Symbol, OP from basalt.autograd.attributes import Attribute, AttributeVector + # '''Activation functions.''' +fn Threshold( + inout g: Graph, + input: Symbol, + threshold: Scalar[dtype], + value: Scalar[dtype], +) -> Symbol: + return g.op( + OP.THRESHOLD, + input, + attributes=AttributeVector( + Attribute("threshold", threshold), Attribute("value", value) + ), + ) fn ReLU(inout g: Graph, input: Symbol) -> Symbol: diff --git a/tests/mojo/test_activations.mojo b/tests/mojo/test_activations.mojo index 200215d9..bb6e92fe 100644 --- a/tests/mojo/test_activations.mojo +++ b/tests/mojo/test_activations.mojo @@ -10,6 +10,7 @@ from basalt.nn import ( ReLU, Sigmoid, Tanh, + Threshold, ) from basalt.autograd import Graph, Symbol from basalt.utils.tensorutils import fill @@ -19,6 +20,12 @@ from tests import assert_tensors_equal alias Activation = fn (inout g: Graph, input: Symbol) -> Symbol alias AxisActivation = fn (inout g: Graph, input: Symbol, axis: Int) -> Symbol +alias ThresholdActivation = fn ( + inout g: Graph, + input: Symbol, + threshold: Scalar[dtype], + value: Scalar[dtype], +) -> Symbol fn create_graph[ @@ -30,7 +37,7 @@ fn create_graph[ var x = g.input(shape) var activation = func(g, x, axis) g.out(activation) - return g ^ + return g^ fn create_graph[shape: TensorShape, func: Activation]() -> Graph: @@ -38,7 +45,36 @@ fn create_graph[shape: TensorShape, func: Activation]() -> Graph: var x = g.input(shape) var activation = func(g, x) g.out(activation) - return g ^ + return g^ + + +fn create_graph[ + shape: TensorShape, + func: ThresholdActivation, + threshold: Scalar[dtype], + value: Scalar[dtype], +]() -> Graph: + var g = Graph() + var x = g.input(shape) + var activation = func(g, x, threshold, value) + g.out(activation) + return g^ + + +fn test_graph[ + shape: TensorShape, + func: ThresholdActivation, + nodes: Int, + threshold: Scalar[dtype], + value: Scalar[dtype], +](input: Tensor[dtype], expected: Tensor[dtype]) raises: + alias graph = create_graph[shape, func, threshold, value]() + + var model = Model[graph](inference_only=True) + var res = model.inference(input)[0] + + assert_tensors_equal["almost"](res, expected) + assert_equal(len(graph.nodes), nodes) fn test_graph[ @@ -56,6 +92,7 @@ fn test_graph[ assert_equal(len(graph.nodes), nodes) +# TODO: All these overloads feel redundant. Find a way to condense them fn test_graph[ shape: TensorShape, func: Activation, @@ -70,6 +107,26 @@ fn test_graph[ assert_equal(len(graph.nodes), nodes, "Node count failed") +fn test_THRESHOLD() raises: + alias shape = TensorShape(2, 3) + alias nodes = 1 + + alias THRESHOLD = 3 + alias VALUE = 2 + + var input = Tensor[dtype](shape) + + for i in range(6): + input[i] = i + + var expected = Tensor[dtype](shape) + + for i in range(6): + expected[i] = i if i > THRESHOLD else VALUE + + test_graph[shape, Threshold, nodes, THRESHOLD, VALUE](input, expected) + + fn test_SOFTMAX() raises: alias shape = TensorShape(2, 3, 2) alias nodes = 5 From 458f1d40f17df619f45121aa7f8a4a4a2e650a60 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 16 May 2024 19:40:03 -0400 Subject: [PATCH 02/16] Reduced cast operations for threshold --- basalt/autograd/ops/mlops.mojo | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 4286420c..8792587c 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -31,9 +31,10 @@ struct THRESHOLD: fn threshold[ type: DType, simd_width: Int ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: - return (x > x.splat(THRESHOLD.cast[type]())).select[type]( - x, VALUE.cast[type]() - ) # Feels like using AttributeVector made this unnecessarily complicated + alias casted_threshold = THRESHOLD.cast[type]() + alias casted_value = VALUE.cast[type]() + + return (x > x.splat(casted_threshold)).select[type](x, casted_value) elwise_transform[threshold](res, t1) From 3ec559f7d728854515c4d4d35e3b817368cd55bc Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 16 May 2024 19:42:46 -0400 Subject: [PATCH 03/16] Fixed naming convention internally in threshold --- basalt/autograd/ops/mlops.mojo | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 8792587c..9be839ec 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -54,14 +54,14 @@ struct THRESHOLD: ]() @always_inline - fn threshold[ + fn threshold_bw[ type: DType, simd_width: Int ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: return (x > x.splat(THRESHOLD.cast[type]())).select[type](1, 0) var res_grad = Tensor[dtype](t1_shape) - elwise_transform[threshold](res_grad, t1) + elwise_transform[threshold_bw](res_grad, t1) return res_grad^ From 5d97da5bc26882798f3fe9e4df9c7810e8297a1e Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 16 May 2024 19:47:03 -0400 Subject: [PATCH 04/16] Fixed bug with backward for threshold --- basalt/autograd/ops/ops.mojo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo index 49b87bc8..48364b2f 100644 --- a/basalt/autograd/ops/ops.mojo +++ b/basalt/autograd/ops/ops.mojo @@ -370,7 +370,7 @@ fn backward_op[ elif op == OP.RESHAPE: res_grad = RESHAPE.backward[ug_shape, t1_shape](ug, t1) elif op == OP.THRESHOLD: - res_grad = THRESHOLD.backward[ug_shape, t1_shape](ug, t1) + res_grad = THRESHOLD.backward[ug_shape, t1_shape, attributes](ug, t1) elif op == OP.SIGMOID: res_grad = SIGMOID.backward[ug_shape, t1_shape](ug, t1) elif op == OP.RELU: From dcacce1c63e231426646343af304383c6370771a Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 16 May 2024 20:18:26 -0400 Subject: [PATCH 05/16] Implemented Hardtanh --- basalt/autograd/ops/ops.mojo | 8 ++++++++ basalt/nn/__init__.mojo | 10 +++++++++- basalt/nn/activations.mojo | 15 +++++++++++++++ tests/mojo/test_activations.mojo | 31 +++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo index 48364b2f..c0d0db1c 100644 --- a/basalt/autograd/ops/ops.mojo +++ b/basalt/autograd/ops/ops.mojo @@ -20,6 +20,7 @@ from .mlops import ( SIGMOID, RELU, TANH, + HARDTANH, CLIP, SQUEEZE, UNSQUEEZE, @@ -71,6 +72,7 @@ struct OP(Stringable): alias SPLIT = OP(24, "SPLIT", dynamic=True) alias SLICE = OP(25, "SLICE") alias THRESHOLD = OP(26, "THRESHOLD") + alias HARDTANH = OP(27, "HARDTANH") var id: UInt8 var name: Bytes[16] @@ -141,6 +143,8 @@ fn static_result_shape( return RELU.result_shape(t1_shape) elif op == OP.TANH: return TANH.result_shape(t1_shape) + elif op == OP.HARDTANH: + return HARDTANH.result_shape(t1_shape) elif op == OP.TRANSPOSE: return TRANSPOSE.result_shape(t1_shape, attributes) elif op == OP.MAXPOOL2D: @@ -257,6 +261,8 @@ fn forward_op[ RELU.forward[t1_shape](res, t1) elif op == OP.TANH: TANH.forward[t1_shape](res, t1) + elif op == OP.HARDTANH: + HARDTANH.forward[t1_shape, attributes](res, t1) elif op == OP.TRANSPOSE: TRANSPOSE.forward[t1_shape, attributes](res, t1) elif op == OP.MAXPOOL2D: @@ -377,6 +383,8 @@ fn backward_op[ res_grad = RELU.backward[ug_shape, t1_shape](ug, t1) elif op == OP.TANH: res_grad = TANH.backward[ug_shape, t1_shape](ug, t1) + elif op == OP.HARDTANH: + res_grad = HARDTANH.backward[ug_shape, t1_shape, attributes](ug, t1) elif op == OP.TRANSPOSE: res_grad = TRANSPOSE.backward[ug_shape, t1_shape, attributes](ug, t1) elif op == OP.MAXPOOL2D: diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo index 3ff64121..855ac8c8 100644 --- a/basalt/nn/__init__.mojo +++ b/basalt/nn/__init__.mojo @@ -6,4 +6,12 @@ from .layers.conv import Conv2d from .layers.pool import MaxPool2d from .loss import MSELoss, CrossEntropyLoss -from .activations import Softmax, LogSoftmax, ReLU, Sigmoid, Tanh, Threshold +from .activations import ( + Softmax, + LogSoftmax, + ReLU, + Sigmoid, + Tanh, + Hardtanh, + Threshold, +) diff --git a/basalt/nn/activations.mojo b/basalt/nn/activations.mojo index 44ecaa1d..b0421212 100644 --- a/basalt/nn/activations.mojo +++ b/basalt/nn/activations.mojo @@ -31,6 +31,21 @@ fn Tanh(inout g: Graph, input: Symbol) -> Symbol: return g.op(OP.TANH, input) +fn Hardtanh( + inout g: Graph, + input: Symbol, + min_val: Scalar[dtype], + max_val: Scalar[dtype], +) -> Symbol: + return g.op( + OP.HARDTANH, + input, + attributes=AttributeVector( + Attribute("min_val", min_val), Attribute("max_val", max_val) + ), + ) + + fn Softmax(inout g: Graph, input: Symbol, axis: Int) -> Symbol: # softmax: exp(x_i) / sum(exp(x_j)) # stable softmax: exp(x_i - max(x_j)) / sum(exp(x_j - max(x_j))) diff --git a/tests/mojo/test_activations.mojo b/tests/mojo/test_activations.mojo index bb6e92fe..88330b16 100644 --- a/tests/mojo/test_activations.mojo +++ b/tests/mojo/test_activations.mojo @@ -10,6 +10,7 @@ from basalt.nn import ( ReLU, Sigmoid, Tanh, + Hardtanh, Threshold, ) from basalt.autograd import Graph, Symbol @@ -208,13 +209,43 @@ fn test_TANH() raises: test_graph[shape, Tanh, nodes](input, expected) +fn test_HARDTANH() raises: + alias shape = TensorShape(3, 3) + alias nodes = 1 + + alias MIN_VAL = -2 + alias MAX_VAL = 2 + + var input = Tensor[dtype](shape) + + for i in range(9): + input[i] = i - 4 + + var expected = Tensor[dtype](shape) + + for j in range(0, 9): + var i = j - 4 + if i < MIN_VAL: + expected[j] = MIN_VAL + + elif i > MAX_VAL: + expected[j] = MAX_VAL + + else: + expected[j] = i + + test_graph[shape, Hardtanh, nodes, MIN_VAL, MAX_VAL](input, expected) + + fn main(): try: + test_THRESHOLD() test_SOFTMAX() test_LOGSOFTMAX() test_RELU() test_SIGMOID() test_TANH() + test_HARDTANH() except e: print("[ERROR] Error in activations") print(e) From 75678224d302e9fe4ed2c098e52ed5a086ae8ea2 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 16 May 2024 20:18:53 -0400 Subject: [PATCH 06/16] Rest of Hardtanh implementation --- basalt/autograd/ops/mlops.mojo | 66 ++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 9be839ec..645f3c2c 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -208,6 +208,72 @@ struct TANH: return res_grad^ +struct HARDTANH: + @staticmethod + fn result_shape(t1_shape: TensorShape) -> TensorShape: + return t1_shape + + @staticmethod + fn forward[ + t1_shape: TensorShape, + attributes: AttributeVector, + ](inout res: Tensor[dtype], t1: Tensor[dtype]): + """Forward pass for hard tanh.""" + + alias MIN_VAL: Scalar[dtype] = attributes["min_val"].value().to_scalar[ + dtype + ]() + + alias MAX_VAL: Scalar[dtype] = attributes["max_val"].value().to_scalar[ + dtype + ]() + + @always_inline + fn hardtanh[ + type: DType, simd_width: Int + ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: + alias casted_min = MIN_VAL.cast[type]() + alias casted_max = MAX_VAL.cast[type]() + + var x_or_min = (x > x.splat(casted_min)).select[type](x, casted_min) + + return (x_or_min < x_or_min.splat(casted_max)).select[type]( + x_or_min, casted_max + ) + + elwise_transform[hardtanh](res, t1) + + @staticmethod + fn backward[ + ug_shape: TensorShape, + t1_shape: TensorShape, + attributes: AttributeVector, + ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: + """Backward pass for hard tanh.""" + alias MIN_VAL: Scalar[dtype] = attributes["min_val"].value().to_scalar[ + dtype + ]() + + alias MAX_VAL: Scalar[dtype] = attributes["max_val"].value().to_scalar[ + dtype + ]() + + @always_inline + fn hardtanh_bw[ + type: DType, simd_width: Int + ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: + return ( + x > x.splat(MIN_VAL.cast[type]()) + and x < x.splat(MAX_VAL.cast[type]()) + ).select[type](1, 0) + + var res_grad = Tensor[dtype](t1_shape) + + elwise_transform[hardtanh_bw](res_grad, t1) + + return res_grad^ + + struct CLIP: @staticmethod fn result_shape(t_shape: TensorShape) -> TensorShape: From 14578fa7ec67f3ba4a5545657fec80f338d5c0df Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Wed, 22 May 2024 08:31:26 -0400 Subject: [PATCH 07/16] Implemented leaky_relu --- basalt/autograd/ops/mlops.mojo | 55 ++++++++++++++++++++++++++++++++++ basalt/autograd/ops/ops.mojo | 8 +++++ basalt/nn/activations.mojo | 10 +++++++ 3 files changed, 73 insertions(+) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index 645f3c2c..defe508d 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -162,6 +162,61 @@ struct RELU: return res_grad^ +struct LEAKYRELU: + @staticmethod + fn result_shape(t1_shape: TensorShape) -> TensorShape: + return t1_shape + + @staticmethod + fn forward[ + t1_shape: TensorShape, + attributes: AttributeVector, + ](inout res: Tensor[dtype], t1: Tensor[dtype]): + """Forward operation of leaky_relu.""" + + fn leaky_relu[ + type: DType, + simd_width: Int, + ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: + var negative_slope = attributes["negative_slope"].value().to_scalar[ + type + ]() + return (x > 0).select(x, x * negative_slope) + + elwise_transform[leaky_relu](res, t1) + + @staticmethod + fn backward[ + ug_shape: TensorShape, + t1_shape: TensorShape, + attributes: AttributeVector, + ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: + """Backward operation of leaky_relu.""" + + @always_inline + fn leaky_relu_bw[ + type: DType, simd_width: Int + ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: + var negative_slope = attributes["negative_slope"].value().to_scalar[ + type + ]() + + return (x > 0).select[type](1, negative_slope) + + var res_grad = Tensor[dtype](ug_shape) + + @parameter + fn vec_leaky_relu_bw[nelts: Int](idx: Int): + res_grad.store[nelts]( + idx, + leaky_relu_bw(t1.load[nelts](idx)) * ug.load[nelts](idx), + ) + + vectorize[vec_leaky_relu_bw, nelts](ug_shape.num_elements()) + + return res_grad^ + + struct TANH: @staticmethod fn result_shape(t1_shape: TensorShape) -> TensorShape: diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo index c0d0db1c..53ace786 100644 --- a/basalt/autograd/ops/ops.mojo +++ b/basalt/autograd/ops/ops.mojo @@ -19,6 +19,7 @@ from .mlops import ( THRESHOLD, SIGMOID, RELU, + LEAKYRELU, TANH, HARDTANH, CLIP, @@ -73,6 +74,7 @@ struct OP(Stringable): alias SLICE = OP(25, "SLICE") alias THRESHOLD = OP(26, "THRESHOLD") alias HARDTANH = OP(27, "HARDTANH") + alias LEAKYRELU = OP(28, "LEAKYRELU") var id: UInt8 var name: Bytes[16] @@ -141,6 +143,8 @@ fn static_result_shape( return SIGMOID.result_shape(t1_shape) elif op == OP.RELU: return RELU.result_shape(t1_shape) + elif op == OP.LEAKYRELU: + return LEAKYRELU.result_shape(t1_shape) elif op == OP.TANH: return TANH.result_shape(t1_shape) elif op == OP.HARDTANH: @@ -259,6 +263,8 @@ fn forward_op[ SIGMOID.forward[t1_shape](res, t1) elif op == OP.RELU: RELU.forward[t1_shape](res, t1) + elif op == OP.LEAKYRELU: + LEAKYRELU.forward[t1_shape, attributes](res, t1) elif op == OP.TANH: TANH.forward[t1_shape](res, t1) elif op == OP.HARDTANH: @@ -381,6 +387,8 @@ fn backward_op[ res_grad = SIGMOID.backward[ug_shape, t1_shape](ug, t1) elif op == OP.RELU: res_grad = RELU.backward[ug_shape, t1_shape](ug, t1) + elif op == OP.LEAKYRELU: + res_grad = LEAKYRELU.backward[ug_shape, t1_shape, attributes](ug, t1) elif op == OP.TANH: res_grad = TANH.backward[ug_shape, t1_shape](ug, t1) elif op == OP.HARDTANH: diff --git a/basalt/nn/activations.mojo b/basalt/nn/activations.mojo index b0421212..61ef51d7 100644 --- a/basalt/nn/activations.mojo +++ b/basalt/nn/activations.mojo @@ -23,6 +23,16 @@ fn ReLU(inout g: Graph, input: Symbol) -> Symbol: return g.op(OP.RELU, input) +fn LeakyReLU( + inout g: Graph, input: Symbol, negative_slope: Scalar[dtype] +) -> Symbol: + return g.op( + OP.LEAKYRELU, + input, + attributes=AttributeVector(Attribute("negative_slope", negative_slope)), + ) + + fn Sigmoid(inout g: Graph, input: Symbol) -> Symbol: return g.op(OP.SIGMOID, input) From 362ef21c86b02078104c207d5e02e0aee8fbe295 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 23 May 2024 07:41:12 -0400 Subject: [PATCH 08/16] Added leaky_relu tests to test_activations.mojo Next up: test_mlops and test_mlops_torch --- tests/mojo/test_activations.mojo | 50 ++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/mojo/test_activations.mojo b/tests/mojo/test_activations.mojo index 88330b16..eaee7bef 100644 --- a/tests/mojo/test_activations.mojo +++ b/tests/mojo/test_activations.mojo @@ -8,6 +8,7 @@ from basalt.nn import ( Softmax, LogSoftmax, ReLU, + LeakyReLU, Sigmoid, Tanh, Hardtanh, @@ -21,6 +22,9 @@ from tests import assert_tensors_equal alias Activation = fn (inout g: Graph, input: Symbol) -> Symbol alias AxisActivation = fn (inout g: Graph, input: Symbol, axis: Int) -> Symbol +alias LeakyReLUActivation = fn ( + inout g: Graph, input: Symbol, negative_slope: Scalar[dtype] +) -> Symbol alias ThresholdActivation = fn ( inout g: Graph, input: Symbol, @@ -41,6 +45,18 @@ fn create_graph[ return g^ +fn create_graph[ + shape: TensorShape, + func: LeakyReLUActivation, + negative_slope: Scalar[dtype], +]() -> Graph: + var g = Graph() + var x = g.input(shape) + var activation = func(g, x, negative_slope) + g.out(activation) + return g^ + + fn create_graph[shape: TensorShape, func: Activation]() -> Graph: var g = Graph() var x = g.input(shape) @@ -93,6 +109,21 @@ fn test_graph[ assert_equal(len(graph.nodes), nodes) +fn test_graph[ + shape: TensorShape, + func: LeakyReLUActivation, + nodes: Int, + negative_slope: Scalar[dtype], +](input: Tensor[dtype], expected: Tensor[dtype]) raises: + alias graph = create_graph[shape, func, negative_slope]() + + var model = Model[graph](inference_only=True) + var res = model.inference(input)[0] + + assert_tensors_equal["almost"](res, expected) + assert_equal(len(graph.nodes), nodes) + + # TODO: All these overloads feel redundant. Find a way to condense them fn test_graph[ shape: TensorShape, @@ -183,6 +214,25 @@ fn test_RELU() raises: test_graph[shape, ReLU, nodes](input, expected) +fn test_LEAKYRELU() raises: + alias negative_slope = 0.1 + + alias shape = TensorShape(2, 3) + alias nodes = 1 + + var input = Tensor[dtype](shape) + + for i in range(6): + input[i] = i - 3 + + var expected = Tensor[dtype](shape) + + for i in range(6): + expected[i] = i - 3 if i - 3 > 0 else negative_slope * (i - 3) + + test_graph[shape, LeakyReLU, nodes, negative_slope](input, expected) + + fn test_SIGMOID() raises: alias shape = TensorShape(2, 3) alias nodes = 1 From b24dd09dc5a19ce4cdee2a1abc0aa7f822fdb541 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 23 May 2024 07:42:32 -0400 Subject: [PATCH 09/16] made LeakyReLU importable through basalt.nn --- basalt/nn/__init__.mojo | 1 + 1 file changed, 1 insertion(+) diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo index 855ac8c8..47d56f33 100644 --- a/basalt/nn/__init__.mojo +++ b/basalt/nn/__init__.mojo @@ -10,6 +10,7 @@ from .activations import ( Softmax, LogSoftmax, ReLU, + LeakyReLU, Sigmoid, Tanh, Hardtanh, From 7cece9f29da26ca506cce8a99fc15282922ddeec Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 23 May 2024 08:48:00 -0400 Subject: [PATCH 10/16] Implemented threshold test in test_mlops.mojo --- tests/mojo/test_mlops.mojo | 52 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo index 2ba723e6..127ce0cf 100644 --- a/tests/mojo/test_mlops.mojo +++ b/tests/mojo/test_mlops.mojo @@ -1,11 +1,59 @@ from basalt import dtype, nelts from basalt.autograd import OP from basalt.autograd.attributes import AttributeVector, Attribute -from basalt.autograd.ops.mlops import SIGMOID, RELU, TANH, CLIP, SQUEEZE, UNSQUEEZE +from basalt.autograd.ops.mlops import ( + SIGMOID, + RELU, + THRESHOLD, + LEAKYRELU, + TANH, + HARDTANH, + CLIP, + SQUEEZE, + UNSQUEEZE, +) from basalt.nn import Tensor, TensorShape from basalt.utils.tensorutils import fill -from tests import assert_tensors_equal, test_unary_op, test_unary_op_backward, to_numpy +from tests import ( + assert_tensors_equal, + test_unary_op, + test_unary_op_backward, + to_numpy, +) + + +fn test_THRESHOLD() raises: + alias t1_shape = TensorShape(2, 3) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + fill(t1, 4.0) + + var expected = Tensor[dtype](2, 3) + fill(expected, 4.0) + + test_unary_op[ + OP.THRESHOLD, + t1_shape, + AttributeVector(Attribute("threshold", 3), Attribute("value", 2)), + ](t1, expected) + + +fn test_backward_THRESHOLD() raises: + alias t1_shape = TensorShape(2, 3) + alias ug_shape = TensorShape(2, 3) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + var ug: Tensor[dtype] = Tensor[dtype](ug_shape) + fill(ug, 2.0) + + var expected_grad = Tensor[dtype](2, 3) + fill(expected_grad, 0) + + test_unary_op_backward[ + OP.THRESHOLD, + t1_shape, + ug_shape, + AttributeVector(Attribute("threshold", 3), Attribute("value", 2)), + ](t1, ug, expected_grad) fn test_SIGMOID() raises: From 20919eac9f1a4dba0702eafaa64cae6284af2198 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 23 May 2024 08:48:41 -0400 Subject: [PATCH 11/16] Implemented LeakyReLU tests in test_mlops --- tests/mojo/test_mlops.mojo | 47 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo index 127ce0cf..584e00e3 100644 --- a/tests/mojo/test_mlops.mojo +++ b/tests/mojo/test_mlops.mojo @@ -119,6 +119,53 @@ fn test_backward_RELU() raises: test_unary_op_backward[OP.RELU, t1_shape, ug_shape](t1, ug, expected_grad) +fn test_LEAKYRELU() raises: + alias t1_shape = TensorShape(2, 3) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + # TODO: When tensors can do slices, this could be changed to two fill functions. + for i in range(3): + t1[i] = 3 + for i in range(3, 6): + t1[i] = -3 + + var expected = Tensor[dtype](2, 3) + for i in range(3): + expected[i] = 3 + for i in range(3, 6): + expected[i] = -0.3 + + test_unary_op[ + OP.LEAKYRELU, + t1_shape, + AttributeVector(Attribute("negative_slope", 0.1)), + ](t1, expected) + + +fn test_backward_LEAKYRELU() raises: + alias t1_shape = TensorShape(2, 3) + alias ug_shape = TensorShape(2, 3) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + var ug: Tensor[dtype] = Tensor[dtype](ug_shape) + for i in range(3): + t1[i] = 3 + for i in range(3, 6): + t1[i] = -3 + fill(ug, 5.0) + + var expected_grad = Tensor[dtype](2, 3) + for i in range(3): + expected_grad[i] = 1 * 5.0 + for i in range(3, 6): + expected_grad[i] = 0.1 * 5.0 + + test_unary_op_backward[ + OP.LEAKYRELU, + t1_shape, + ug_shape, + AttributeVector(Attribute("negative_slope", 0.1)), + ](t1, ug, expected_grad) + + fn test_TANH() raises: alias t1_shape = TensorShape(2, 3) var t1: Tensor[dtype] = Tensor[dtype](t1_shape) From c92d75d0dd191f3c6094420722fed87bb7d8a812 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 23 May 2024 08:49:03 -0400 Subject: [PATCH 12/16] Implemented HardTanH tests in test_mlops --- tests/mojo/test_mlops.mojo | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo index 584e00e3..3ceeabc1 100644 --- a/tests/mojo/test_mlops.mojo +++ b/tests/mojo/test_mlops.mojo @@ -189,6 +189,38 @@ fn test_backward_TANH() raises: test_unary_op_backward[OP.TANH, t1_shape, ug_shape](t1, ug, expected_grad) +fn test_HARDTANH() raises: + alias t1_shape = TensorShape(2, 3) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + + var expected = Tensor[dtype](2, 3) + fill(expected, 0.0) + + test_unary_op[ + OP.HARDTANH, + t1_shape, + AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)), + ](t1, expected) + + +fn test_backward_HARDTANH() raises: + alias t1_shape = TensorShape(2, 3) + alias ug_shape = TensorShape(2, 3) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + var ug: Tensor[dtype] = Tensor[dtype](ug_shape) + fill(ug, 5.0) + + var expected_grad = Tensor[dtype](2, 3) + fill(expected_grad, 0) # 5 > 3, so slope is 0. + + test_unary_op_backward[ + OP.HARDTANH, + t1_shape, + ug_shape, + AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)), + ](t1, ug, expected_grad) + + fn test_CLIP() raises: alias t1_shape = TensorShape(2, 3) var t1: Tensor[dtype] = Tensor[dtype](t1_shape) From 3442d783aefbf001fb5d09b5d867c01472479db2 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Thu, 23 May 2024 08:49:43 -0400 Subject: [PATCH 13/16] Autogenerated formatting changes to test_mlops --- tests/mojo/test_mlops.mojo | 300 ++++++++++++++++++++++++++----------- 1 file changed, 213 insertions(+), 87 deletions(-) diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo index 3ceeabc1..fe3ddb5d 100644 --- a/tests/mojo/test_mlops.mojo +++ b/tests/mojo/test_mlops.mojo @@ -78,7 +78,9 @@ fn test_backward_SIGMOID() raises: expected_grad, 5.0 * 0.25 ) # 0.25 = d(sigmoid(0))/dx = sigmoid(0) * (1 - sigmoid(0)) - test_unary_op_backward[OP.SIGMOID, t1_shape, ug_shape](t1, ug, expected_grad) + test_unary_op_backward[OP.SIGMOID, t1_shape, ug_shape]( + t1, ug, expected_grad + ) fn test_RELU() raises: @@ -237,7 +239,9 @@ fn test_CLIP() raises: for i in range(6): var val = Scalar[dtype](i - 3) expected_min[i] = val if (val > -1.1) else -1.1 - test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr)](t1, expected_min) + test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr)]( + t1, expected_min + ) # Clip with max alias max_attr = Attribute("max", 1.1) @@ -245,7 +249,9 @@ fn test_CLIP() raises: for i in range(6): var val = Scalar[dtype](i - 3) expected_max[i] = val if (val < 1.1) else 1.1 - test_unary_op[OP.CLIP, t1_shape, AttributeVector(max_attr)](t1, expected_max) + test_unary_op[OP.CLIP, t1_shape, AttributeVector(max_attr)]( + t1, expected_max + ) # Clip with min and max var expected = Tensor[dtype](2, 3) @@ -257,7 +263,9 @@ fn test_CLIP() raises: expected[i] = 1.1 else: expected[i] = val - test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr, max_attr)](t1, expected) + test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr, max_attr)]( + t1, expected + ) fn test_backward_CLIP() raises: @@ -279,7 +287,9 @@ fn test_backward_CLIP() raises: for i in range(6): var val = Scalar[dtype](i - 3) expected_min[i] = 5.0 if (val > -1.1) else 0.0 - test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, min_attr](t1, ug, expected_min) + test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, min_attr]( + t1, ug, expected_min + ) # Clip with max alias max_attr = AttributeVector(Attribute("max", 1.1)) @@ -287,7 +297,9 @@ fn test_backward_CLIP() raises: for i in range(6): var val = Scalar[dtype](i - 3) expected_max[i] = 5.0 if (val < 1.1) else 0.0 - test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, max_attr](t1, ug, expected_max) + test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, max_attr]( + t1, ug, expected_max + ) # Clip with min and max alias attrs = AttributeVector(Attribute("min", -1.1), Attribute("max", 1.1)) @@ -328,7 +340,9 @@ fn test_SQUEEZE() raises: expected = Tensor[dtype](1, 2, 3) fill(expected, 5.0) test_unary_op[ - OP.SQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(2, 4))) + OP.SQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(2, 4))), ](t1, expected) @@ -343,7 +357,9 @@ fn test_backward_SQUEEZE() raises: var expected_grad = Tensor[dtype](2, 1, 3, 1) fill(expected_grad, 5.0) - test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape](t1, ug, expected_grad) + test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape]( + t1, ug, expected_grad + ) fn test_UNSQUEEZE() raises: @@ -355,26 +371,34 @@ fn test_UNSQUEEZE() raises: var expected = Tensor[dtype](2, 1, 3, 1) fill(expected, 5.0) test_unary_op[ - OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(1, 3))) + OP.UNSQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(1, 3))), ](t1, expected) expected = Tensor[dtype](2, 1, 3) fill(expected, 5.0) test_unary_op[ - OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(1))) + OP.UNSQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(1))), ](t1, expected) expected = Tensor[dtype](1, 2, 3) fill(expected, 5.0) test_unary_op[ - OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(-3))) + OP.UNSQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(-3))), ](t1, expected) expected = Tensor[dtype](2, 1, 3, 1) fill(expected, 5.0) test_unary_op[ - OP.UNSQUEEZE, t1_shape, AttributeVector(Attribute("dims", TensorShape(-1, -3))) + OP.UNSQUEEZE, + t1_shape, + AttributeVector(Attribute("dims", TensorShape(-1, -3))), ](t1, expected) @@ -389,7 +413,9 @@ fn test_backward_UNSQUEEZE() raises: var expected_grad = Tensor[dtype](2, 3) fill(expected_grad, 5.0) - test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape](t1, ug, expected_grad) + test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape]( + t1, ug, expected_grad + ) fn test_SLICE() raises: @@ -397,7 +423,7 @@ fn test_SLICE() raises: var t1: Tensor[dtype] = Tensor[dtype](t1_shape) for i in range(t1.num_elements()): t1[i] = i - + alias slice = Slice(1, 3, 1) # dim = 0 @@ -405,15 +431,17 @@ fn test_SLICE() raises: for i in range(2): for j in range(4): for k in range(5): - expected_0[i*4*5 + j*5 + k] = (i + 1) * 4 * 5 + j * 5 + k + expected_0[i * 4 * 5 + j * 5 + k] = (i + 1) * 4 * 5 + j * 5 + k test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(0)) - ) + Attribute("axes", TensorShape(0)), + ), ](t1, expected_0) # dim = 1 @@ -421,15 +449,17 @@ fn test_SLICE() raises: for i in range(3): for j in range(2): for k in range(5): - expected_1[i*2*5 + j*5 + k] = i * 4 * 5 + (j + 1) * 5 + k + expected_1[i * 2 * 5 + j * 5 + k] = i * 4 * 5 + (j + 1) * 5 + k test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(1)) - ) + Attribute("axes", TensorShape(1)), + ), ](t1, expected_1) # dim = 2 @@ -437,15 +467,17 @@ fn test_SLICE() raises: for i in range(3): for j in range(4): for k in range(2): - expected_2[i*4*2 + j*2 + k] = i * 4 * 5 + j * 5 + (k + 1) - + expected_2[i * 4 * 2 + j * 2 + k] = i * 4 * 5 + j * 5 + (k + 1) + test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(2)) - ) + Attribute("axes", TensorShape(2)), + ), ](t1, expected_2) @@ -462,15 +494,19 @@ fn test_SLICE_step() raises: for i in range(3): for j in range(2): for k in range(2): - expected_0[i*2*2 + j*2 + k] = (i*2 + 1) * 2 * 2 + j * 2 + k + expected_0[i * 2 * 2 + j * 2 + k] = ( + (i * 2 + 1) * 2 * 2 + j * 2 + k + ) test_unary_op[ - OP.SLICE, t0_shape, AttributeVector( + OP.SLICE, + t0_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(0)) - ) + Attribute("axes", TensorShape(0)), + ), ](t0, expected_0) # dim = 1 @@ -483,15 +519,19 @@ fn test_SLICE_step() raises: for i in range(2): for j in range(3): for k in range(2): - expected_1[i*3*2 + j*2 + k] = i * 10 * 2 + (j*2 + 1) * 2 + k + expected_1[i * 3 * 2 + j * 2 + k] = ( + i * 10 * 2 + (j * 2 + 1) * 2 + k + ) test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(1)) - ) + Attribute("axes", TensorShape(1)), + ), ](t1, expected_1) # dim = 2 @@ -504,15 +544,19 @@ fn test_SLICE_step() raises: for i in range(2): for j in range(2): for k in range(3): - expected_2[i*2*3 + j*3 + k] = i * 2 * 10 + j * 10 + (k*2 + 1) + expected_2[i * 2 * 3 + j * 3 + k] = ( + i * 2 * 10 + j * 10 + (k * 2 + 1) + ) test_unary_op[ - OP.SLICE, t2_shape, AttributeVector( + OP.SLICE, + t2_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(2)) - ) + Attribute("axes", TensorShape(2)), + ), ](t2, expected_2) @@ -529,15 +573,19 @@ fn test_SLICE_neg() raises: for i in range(3): for j in range(2): for k in range(2): - expected_0[i*2*2 + j*2 + k] = StaticIntTuple[3](6, 4, 2)[i] * 2 * 2 + j * 2 + k + expected_0[i * 2 * 2 + j * 2 + k] = ( + StaticIntTuple[3](6, 4, 2)[i] * 2 * 2 + j * 2 + k + ) test_unary_op[ - OP.SLICE, t0_shape, AttributeVector( + OP.SLICE, + t0_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(0)) - ) + Attribute("axes", TensorShape(0)), + ), ](t0, expected_0) # dim = 1 @@ -550,15 +598,19 @@ fn test_SLICE_neg() raises: for i in range(2): for j in range(3): for k in range(2): - expected_1[i*3*2 + j*2 + k] = i * 10 * 2 + StaticIntTuple[3](6, 4, 2)[j] * 2 + k + expected_1[i * 3 * 2 + j * 2 + k] = ( + i * 10 * 2 + StaticIntTuple[3](6, 4, 2)[j] * 2 + k + ) test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( + OP.SLICE, + t1_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(1)) - ) + Attribute("axes", TensorShape(1)), + ), ](t1, expected_1) # dim = 2 @@ -571,15 +623,19 @@ fn test_SLICE_neg() raises: for i in range(2): for j in range(2): for k in range(3): - expected_2[i*2*3 + j*3 + k] = i * 2 * 10 + j * 10 + StaticIntTuple[3](6, 4, 2)[k] + expected_2[i * 2 * 3 + j * 3 + k] = ( + i * 2 * 10 + j * 10 + StaticIntTuple[3](6, 4, 2)[k] + ) test_unary_op[ - OP.SLICE, t2_shape, AttributeVector( + OP.SLICE, + t2_shape, + AttributeVector( Attribute("starts", TensorShape(slice.start)), Attribute("ends", TensorShape(slice.end)), Attribute("steps", TensorShape(slice.step)), - Attribute("axes", TensorShape(2)) - ) + Attribute("axes", TensorShape(2)), + ), ](t2, expected_2) @@ -597,22 +653,35 @@ fn test_SLICE_multiple_axes() raises: for i in range(3): for j in range(3): for k in range(5): - expected[i*3*5 + j*5 + k] = StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + StaticIntTuple[3](3, 6, 9)[j] * 40 + StaticIntTuple[5](5, 7, 9, 11, 13)[k] - + expected[i * 3 * 5 + j * 5 + k] = ( + StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + + StaticIntTuple[3](3, 6, 9)[j] * 40 + + StaticIntTuple[5](5, 7, 9, 11, 13)[k] + ) + test_unary_op[ - OP.SLICE, t1_shape, AttributeVector( - Attribute("starts", TensorShape(slice_0.start, slice_1.start, slice_2.start)), - Attribute("ends", TensorShape(slice_0.end, slice_1.end, slice_2.end)), - Attribute("steps", TensorShape(slice_0.step, slice_1.step, slice_2.step)), + OP.SLICE, + t1_shape, + AttributeVector( + Attribute( + "starts", + TensorShape(slice_0.start, slice_1.start, slice_2.start), + ), + Attribute( + "ends", TensorShape(slice_0.end, slice_1.end, slice_2.end) + ), + Attribute( + "steps", TensorShape(slice_0.step, slice_1.step, slice_2.step) + ), # Attribute("axes", TensorShape(0, 1, 2)) - ) + ), ](t1, expected) alias t2_shape = TensorShape(20, 32, 40, 50) var t2: Tensor[dtype] = Tensor[dtype](t2_shape) for i in range(t2.num_elements()): t2[i] = i - + alias slice_2_1 = Slice(1, 6, 2) alias slice_2_2 = Slice(3, 10, 3) alias slice_2_3 = Slice(5, 15, 2) @@ -624,14 +693,42 @@ fn test_SLICE_multiple_axes() raises: for j in range(3): for k in range(5): for l in range(4): - expected_2[i*3*5*4 + j*5*4 + k*4 + l] = StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 * 50 + StaticIntTuple[3](3, 6, 9)[j] * 40 * 50 + StaticIntTuple[5](5, 7, 9, 11, 13)[k] * 50 + StaticIntTuple[4](7, 11, 15, 19)[l] - + expected_2[i * 3 * 5 * 4 + j * 5 * 4 + k * 4 + l] = ( + StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 * 50 + + StaticIntTuple[3](3, 6, 9)[j] * 40 * 50 + + StaticIntTuple[5](5, 7, 9, 11, 13)[k] * 50 + + StaticIntTuple[4](7, 11, 15, 19)[l] + ) + test_unary_op[ - OP.SLICE, t2_shape, AttributeVector( - Attribute("starts", TensorShape(slice_2_1.start, slice_2_2.start, slice_2_3.start, slice_2_4.start)), - Attribute("ends", TensorShape(slice_2_1.end, slice_2_2.end, slice_2_3.end, slice_2_4.end)), - Attribute("steps", TensorShape(slice_2_1.step, slice_2_2.step, slice_2_3.step, slice_2_4.step)), - ) + OP.SLICE, + t2_shape, + AttributeVector( + Attribute( + "starts", + TensorShape( + slice_2_1.start, + slice_2_2.start, + slice_2_3.start, + slice_2_4.start, + ), + ), + Attribute( + "ends", + TensorShape( + slice_2_1.end, slice_2_2.end, slice_2_3.end, slice_2_4.end + ), + ), + Attribute( + "steps", + TensorShape( + slice_2_1.step, + slice_2_2.step, + slice_2_3.step, + slice_2_4.step, + ), + ), + ), ](t2, expected_2) @@ -650,15 +747,18 @@ fn test_backward_SLICE() raises: for i in range(2): for j in range(4): for k in range(5): - expected_ug0[(i+1)*4*5 + j*5 + k] = 1.0 + expected_ug0[(i + 1) * 4 * 5 + j * 5 + k] = 1.0 test_unary_op_backward[ - OP.SLICE, t0_shape, ug0_shape, AttributeVector( + OP.SLICE, + t0_shape, + ug0_shape, + AttributeVector( Attribute("starts", TensorShape(slice_0.start)), Attribute("ends", TensorShape(slice_0.end)), Attribute("steps", TensorShape(slice_0.step)), - Attribute("axes", TensorShape(0)) - ) + Attribute("axes", TensorShape(0)), + ), ](t0, ug0, expected_ug0) # dim = 1 (step = 2) @@ -670,20 +770,23 @@ fn test_backward_SLICE() raises: alias ug1_shape = TensorShape(2, 3, 2) var ug1: Tensor[dtype] = Tensor[dtype](ug1_shape) fill(ug1, 1.0) - + var expected_ug1 = Tensor[dtype](t1_shape) for i in range(2): for j in range(3): for k in range(2): - expected_ug1[i*10*2 + (j*2 + 1)*2 + k] = 1.0 + expected_ug1[i * 10 * 2 + (j * 2 + 1) * 2 + k] = 1.0 test_unary_op_backward[ - OP.SLICE, t1_shape, ug1_shape, AttributeVector( + OP.SLICE, + t1_shape, + ug1_shape, + AttributeVector( Attribute("starts", TensorShape(slice_1.start)), Attribute("ends", TensorShape(slice_1.end)), Attribute("steps", TensorShape(slice_1.step)), - Attribute("axes", TensorShape(1)) - ) + Attribute("axes", TensorShape(1)), + ), ](t1, ug1, expected_ug1) # dim = 2 (step = -2) @@ -700,15 +803,20 @@ fn test_backward_SLICE() raises: for i in range(2): for j in range(2): for k in range(3): - expected_ug2[i*2*10 + j*10 + StaticIntTuple[3](6, 4, 2)[k]] = 1.0 + expected_ug2[ + i * 2 * 10 + j * 10 + StaticIntTuple[3](6, 4, 2)[k] + ] = 1.0 test_unary_op_backward[ - OP.SLICE, t2_shape, ug2_shape, AttributeVector( + OP.SLICE, + t2_shape, + ug2_shape, + AttributeVector( Attribute("starts", TensorShape(slice_2.start)), Attribute("ends", TensorShape(slice_2.end)), Attribute("steps", TensorShape(slice_2.step)), - Attribute("axes", TensorShape(2)) - ) + Attribute("axes", TensorShape(2)), + ), ](t2, ug2, expected_ug2) @@ -726,8 +834,12 @@ fn test_backward_SLICE_multiple_axes() raises: for i in range(3): for j in range(3): for k in range(5): - expected[i*3*5 + j*5 + k] = StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + StaticIntTuple[3](3, 6, 9)[j] * 40 + StaticIntTuple[5](5, 7, 9, 11, 13)[k] - + expected[i * 3 * 5 + j * 5 + k] = ( + StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + + StaticIntTuple[3](3, 6, 9)[j] * 40 + + StaticIntTuple[5](5, 7, 9, 11, 13)[k] + ) + alias ug_shape = TensorShape(3, 3, 5) var ug: Tensor[dtype] = Tensor[dtype](ug_shape) fill(ug, 1.0) @@ -736,14 +848,28 @@ fn test_backward_SLICE_multiple_axes() raises: for i in range(3): for j in range(3): for k in range(5): - expected_ug[StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + StaticIntTuple[3](3, 6, 9)[j] * 40 + StaticIntTuple[5](5, 7, 9, 11, 13)[k]] = 1.0 + expected_ug[ + StaticIntTuple[5](1, 3, 5, 7, 9)[i] * 32 * 40 + + StaticIntTuple[3](3, 6, 9)[j] * 40 + + StaticIntTuple[5](5, 7, 9, 11, 13)[k] + ] = 1.0 test_unary_op_backward[ - OP.SLICE, t1_shape, ug_shape, AttributeVector( - Attribute("starts", TensorShape(slice_0.start, slice_1.start, slice_2.start)), - Attribute("ends", TensorShape(slice_0.end, slice_1.end, slice_2.end)), - Attribute("steps", TensorShape(slice_0.step, slice_1.step, slice_2.step)), - ) + OP.SLICE, + t1_shape, + ug_shape, + AttributeVector( + Attribute( + "starts", + TensorShape(slice_0.start, slice_1.start, slice_2.start), + ), + Attribute( + "ends", TensorShape(slice_0.end, slice_1.end, slice_2.end) + ), + Attribute( + "steps", TensorShape(slice_0.step, slice_1.step, slice_2.step) + ), + ), ](t1, ug, expected_ug) From 62ec6a46b7c0f639747e155891723dc7679ab63e Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Fri, 31 May 2024 07:12:06 -0400 Subject: [PATCH 14/16] Added torch compatibility test for leaky_relu --- tests/python/test_mlops_torch.mojo | 207 +++++++++++++++++++++-------- 1 file changed, 154 insertions(+), 53 deletions(-) diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo index 2f4747cb..a5d3b27f 100644 --- a/tests/python/test_mlops_torch.mojo +++ b/tests/python/test_mlops_torch.mojo @@ -47,6 +47,11 @@ fn torch_unary_op( expected = torch.sigmoid(input_1) elif op == OP.RELU: expected = torch.relu(input_1) + elif op == OP.LEAKYRELU: + expected = torch.nn.functional.leaky_relu( + input_1, + attrs.value()["negative_slope"].value().to_scalar[dtype](), + ) elif op == OP.TANH: expected = torch.tanh(input_1) elif op == OP.CLIP: @@ -65,7 +70,9 @@ fn torch_unary_op( var dim = attrs["dims"] if dim: - expected = torch.squeeze(input_1, dim=dim.value().to_shape()[0]) + expected = torch.squeeze( + input_1, dim=dim.value().to_shape()[0] + ) else: expected = torch.squeeze(input_1) elif attrs_tuple: @@ -78,7 +85,9 @@ fn torch_unary_op( var dim = attrs["dims"] if dim: - expected = torch.unsqueeze(input_1, dim=dim.value().to_shape()[0]) + expected = torch.unsqueeze( + input_1, dim=dim.value().to_shape()[0] + ) else: expected = torch.unsqueeze(input_1, 0) elif attrs_tuple: @@ -102,11 +111,11 @@ fn torch_unary_op( if step < 0: flip_dims.append(dim) - step = step *- 1 + step = step * -1 end, start = (end + 1) * -1, (start + 1) * -1 indices[dim] = py.slice(start, end, step) - + expected = input_1.flip(flip_dims)[indices] else: print("Error: op not supported (returning the value input_1): ", op) @@ -159,6 +168,31 @@ fn test_RELU() raises: ) +fn test_LEAKYRELU() raises: + alias t1_shape = TensorShape(37, 63, 107) + alias ug_shape = TensorShape(37, 63, 107) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + rand(t1.data(), t1.num_elements()) + + var ug = Tensor[dtype](ug_shape) + rand(ug.data(), ug.num_elements()) + + var expected_and_grad = torch_unary_op( + OP.LEAKYRELU, t1, ug, AttributeVector(Attribute("negative_slope", 0.1)) + ) + test_unary_op[ + OP.LEAKYRELU, + t1_shape, + AttributeVector(Attribute("negative_slope", 0.1)), + ](t1, expected_and_grad.expected) + test_unary_op_backward[ + OP.LEAKYRELU, + t1_shape, + ug_shape, + AttributeVector(Attribute("negative_slope", 0.1)), + ](t1, ug, expected_and_grad.grad_1) + + fn test_TANH() raises: alias t1_shape = TensorShape(37, 63, 107) alias ug_shape = TensorShape(37, 63, 107) @@ -193,23 +227,27 @@ fn test_CLIP() raises: # Clip with min alias min_attr = Attribute("min", 0.3333) - expected_and_grad = torch_unary_op(OP.CLIP, t1, ug, AttributeVector(min_attr)) + expected_and_grad = torch_unary_op( + OP.CLIP, t1, ug, AttributeVector(min_attr) + ) test_unary_op[OP.CLIP, t1_shape, AttributeVector(min_attr)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, AttributeVector(min_attr)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.CLIP, t1_shape, ug_shape, AttributeVector(min_attr) + ](t1, ug, expected_and_grad.grad_1) # Clip with max alias max_attr = Attribute("max", 0.6666) - expected_and_grad = torch_unary_op(OP.CLIP, t1, ug, AttributeVector(max_attr)) + expected_and_grad = torch_unary_op( + OP.CLIP, t1, ug, AttributeVector(max_attr) + ) test_unary_op[OP.CLIP, t1_shape, AttributeVector(max_attr)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.CLIP, t1_shape, ug_shape, AttributeVector(max_attr)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.CLIP, t1_shape, ug_shape, AttributeVector(max_attr) + ](t1, ug, expected_and_grad.grad_1) # Clip with min and max expected_and_grad = torch_unary_op( @@ -249,9 +287,9 @@ fn test_SQUEEZE() raises: test_unary_op[OP.SQUEEZE, t1_shape, AttributeVector(dim)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape_1, AttributeVector(dim)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.SQUEEZE, t1_shape, ug_shape_1, AttributeVector(dim) + ](t1, ug, expected_and_grad.grad_1) alias ug_shape_2 = TensorShape(20, 28, 1) ug = Tensor[dtype](ug_shape_2) @@ -259,13 +297,15 @@ fn test_SQUEEZE() raises: alias dim_2 = Attribute("dims", TensorShape(1)) - expected_and_grad = torch_unary_op(OP.SQUEEZE, t1, ug, AttributeVector(dim_2)) + expected_and_grad = torch_unary_op( + OP.SQUEEZE, t1, ug, AttributeVector(dim_2) + ) test_unary_op[OP.SQUEEZE, t1_shape, AttributeVector(dim_2)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape_2, AttributeVector(dim_2)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.SQUEEZE, t1_shape, ug_shape_2, AttributeVector(dim_2) + ](t1, ug, expected_and_grad.grad_1) # Squeeze with multiple dims ug = Tensor[dtype](ug_shape) @@ -282,9 +322,9 @@ fn test_SQUEEZE() raises: test_unary_op[OP.SQUEEZE, t1_shape, AttributeVector(dims)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.SQUEEZE, t1_shape, ug_shape, AttributeVector(dims)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.SQUEEZE, t1_shape, ug_shape, AttributeVector(dims) + ](t1, ug, expected_and_grad.grad_1) fn test_UNSQUEEZE() raises: @@ -298,13 +338,15 @@ fn test_UNSQUEEZE() raises: alias dim = Attribute("dims", TensorShape(1)) - var expected_and_grad = torch_unary_op(OP.UNSQUEEZE, t1, ug, AttributeVector(dim)) + var expected_and_grad = torch_unary_op( + OP.UNSQUEEZE, t1, ug, AttributeVector(dim) + ) test_unary_op[OP.UNSQUEEZE, t1_shape, AttributeVector(dim)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape, AttributeVector(dim)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.UNSQUEEZE, t1_shape, ug_shape, AttributeVector(dim) + ](t1, ug, expected_and_grad.grad_1) # Unsqueeze with multiple dims alias ug_shape_2 = TensorShape(20, 1, 28, 1) @@ -321,9 +363,9 @@ fn test_UNSQUEEZE() raises: test_unary_op[OP.UNSQUEEZE, t1_shape, AttributeVector(dims)]( t1, expected_and_grad.expected ) - test_unary_op_backward[OP.UNSQUEEZE, t1_shape, ug_shape_2, AttributeVector(dims)]( - t1, ug, expected_and_grad.grad_1 - ) + test_unary_op_backward[ + OP.UNSQUEEZE, t1_shape, ug_shape_2, AttributeVector(dims) + ](t1, ug, expected_and_grad.grad_1) fn test_SLICE() raises: @@ -337,17 +379,23 @@ fn test_SLICE() raises: Attribute("starts", TensorShape(slice_0.start)), Attribute("ends", TensorShape(slice_0.end)), Attribute("steps", TensorShape(slice_0.step)), - Attribute("axes", TensorShape(0)) + Attribute("axes", TensorShape(0)), ) alias ug_shape = TensorShape(65, 322, 317) var ug = Tensor[dtype](ug_shape) rand(ug.data(), ug.num_elements()) - var attrs_tuple_0 = PythonObject((slice_0.start, slice_0.end, slice_0.step, 0)) - var expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0) + var attrs_tuple_0 = PythonObject( + (slice_0.start, slice_0.end, slice_0.step, 0) + ) + var expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0 + ) test_unary_op[OP.SLICE, t1_shape, attrs_0](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape, attrs_0](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape, attrs_0]( + t1, ug, expected_and_grad.grad_1 + ) # dim = 1 alias slice_1 = Slice(10, 311, 5) @@ -355,17 +403,23 @@ fn test_SLICE() raises: Attribute("starts", TensorShape(slice_1.start)), Attribute("ends", TensorShape(slice_1.end)), Attribute("steps", TensorShape(slice_1.step)), - Attribute("axes", TensorShape(1)) + Attribute("axes", TensorShape(1)), ) alias ug_shape_1 = TensorShape(430, 61, 317) ug = Tensor[dtype](ug_shape_1) rand(ug.data(), ug.num_elements()) - var attrs_tuple_1 = PythonObject((slice_1.start, slice_1.end, slice_1.step, 1)) - expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_1) + var attrs_tuple_1 = PythonObject( + (slice_1.start, slice_1.end, slice_1.step, 1) + ) + expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_1 + ) test_unary_op[OP.SLICE, t1_shape, attrs_1](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_1, attrs_1](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_1, attrs_1]( + t1, ug, expected_and_grad.grad_1 + ) # dim = 2 alias slice_2 = Slice(293, 33, -7) @@ -373,20 +427,26 @@ fn test_SLICE() raises: Attribute("starts", TensorShape(slice_2.start)), Attribute("ends", TensorShape(slice_2.end)), Attribute("steps", TensorShape(slice_2.step)), - Attribute("axes", TensorShape(2)) + Attribute("axes", TensorShape(2)), ) alias ug_shape_2 = TensorShape(430, 322, 38) ug = Tensor[dtype](ug_shape_2) rand(ug.data(), ug.num_elements()) - var attrs_tuple_2 = PythonObject((slice_2.start, slice_2.end, slice_2.step, 2)) - expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_2) + var attrs_tuple_2 = PythonObject( + (slice_2.start, slice_2.end, slice_2.step, 2) + ) + expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_2 + ) test_unary_op[OP.SLICE, t1_shape, attrs_2](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_2, attrs_2](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_2, attrs_2]( + t1, ug, expected_and_grad.grad_1 + ) # Multiple dims - + # dim = 0, 1 alias slice_0_1 = Slice(23, 340, 3) alias slice_1_1 = Slice(10, 250, 5) @@ -395,17 +455,32 @@ fn test_SLICE() raises: Attribute("starts", TensorShape(slice_0_1.start, slice_1_1.start)), Attribute("ends", TensorShape(slice_0_1.end, slice_1_1.end)), Attribute("steps", TensorShape(slice_0_1.step, slice_1_1.step)), - Attribute("axes", TensorShape(0, 1)) + Attribute("axes", TensorShape(0, 1)), ) alias ug_shape_0_1 = TensorShape(106, 48, 317) ug = Tensor[dtype](ug_shape_0_1) rand(ug.data(), ug.num_elements()) - var attrs_tuple_0_1 = PythonObject((slice_0_1.start, slice_0_1.end, slice_0_1.step, 0, slice_1_1.start, slice_1_1.end, slice_1_1.step, 1)) - expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_1) + var attrs_tuple_0_1 = PythonObject( + ( + slice_0_1.start, + slice_0_1.end, + slice_0_1.step, + 0, + slice_1_1.start, + slice_1_1.end, + slice_1_1.step, + 1, + ) + ) + expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_1 + ) test_unary_op[OP.SLICE, t1_shape, attrs_0_1](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_1, attrs_0_1](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_1, attrs_0_1]( + t1, ug, expected_and_grad.grad_1 + ) # dim = 0, 1, 2 alias slice_0_2 = Slice(-412, -5, 3) @@ -413,20 +488,46 @@ fn test_SLICE() raises: alias slice_2_2 = Slice(293, 33, -7) alias attrs_0_2 = AttributeVector( - Attribute("starts", TensorShape(slice_0_2.start, slice_1_2.start, slice_2_2.start)), - Attribute("ends", TensorShape(slice_0_2.end, slice_1_2.end, slice_2_2.end)), - Attribute("steps", TensorShape(slice_0_2.step, slice_1_2.step, slice_2_2.step)), - Attribute("axes", TensorShape(0, 1, 2)) + Attribute( + "starts", + TensorShape(slice_0_2.start, slice_1_2.start, slice_2_2.start), + ), + Attribute( + "ends", TensorShape(slice_0_2.end, slice_1_2.end, slice_2_2.end) + ), + Attribute( + "steps", TensorShape(slice_0_2.step, slice_1_2.step, slice_2_2.step) + ), + Attribute("axes", TensorShape(0, 1, 2)), ) alias ug_shape_0_2 = TensorShape(136, 35, 38) ug = Tensor[dtype](ug_shape_0_2) rand(ug.data(), ug.num_elements()) - var attrs_tuple_0_2 = PythonObject((slice_0_2.start, slice_0_2.end, slice_0_2.step, 0, slice_1_2.start, slice_1_2.end, slice_1_2.step, 1, slice_2_2.start, slice_2_2.end, slice_2_2.step, 2)) - expected_and_grad = torch_unary_op(OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_2) + var attrs_tuple_0_2 = PythonObject( + ( + slice_0_2.start, + slice_0_2.end, + slice_0_2.step, + 0, + slice_1_2.start, + slice_1_2.end, + slice_1_2.step, + 1, + slice_2_2.start, + slice_2_2.end, + slice_2_2.step, + 2, + ) + ) + expected_and_grad = torch_unary_op( + OP.SLICE, t1, ug, attrs_tuple=attrs_tuple_0_2 + ) test_unary_op[OP.SLICE, t1_shape, attrs_0_2](t1, expected_and_grad.expected) - test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_2, attrs_0_2](t1, ug, expected_and_grad.grad_1) + test_unary_op_backward[OP.SLICE, t1_shape, ug_shape_0_2, attrs_0_2]( + t1, ug, expected_and_grad.grad_1 + ) fn main(): From 871371f712a9da20f5ad8c0bb610c48224640be4 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Fri, 31 May 2024 10:28:53 -0400 Subject: [PATCH 15/16] Added torch compatibility test for hard tanh --- tests/python/test_mlops_torch.mojo | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo index a5d3b27f..2cdbf043 100644 --- a/tests/python/test_mlops_torch.mojo +++ b/tests/python/test_mlops_torch.mojo @@ -54,6 +54,12 @@ fn torch_unary_op( ) elif op == OP.TANH: expected = torch.tanh(input_1) + elif op == OP.HARDTANH: + expected = torch.nn.functional.hardtanh( + input_1, + min_val=attrs.value()["min_val"].value().to_scalar[dtype](), + max_val=attrs.value()["max_val"].value().to_scalar[dtype](), + ) elif op == OP.CLIP: var min_attr = attrs.value()["min"] var max_attr = attrs.value()["max"] @@ -209,6 +215,29 @@ fn test_TANH() raises: ) +fn test_HARDTANH() raises: + alias t1_shape = TensorShape(37, 63, 107) + alias ug_shape = TensorShape(37, 63, 107) + var t1: Tensor[dtype] = Tensor[dtype](t1_shape) + rand(t1.data(), t1.num_elements()) + + var ug = Tensor[dtype](ug_shape) + rand(ug.data(), ug.num_elements()) + + var expected_and_grad = torch_unary_op(OP.HARDTANH, t1, ug) + test_unary_op[ + OP.HARDTANH, + t1_shape, + AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)), + ](t1, expected_and_grad.expected) + test_unary_op_backward[ + OP.HARDTANH, + t1_shape, + ug_shape, + AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)), + ](t1, ug, expected_and_grad.grad_1) + + fn test_CLIP() raises: alias t1_shape = TensorShape(37, 63, 107) alias ug_shape = TensorShape(37, 63, 107) From 083e2918d7636d83ddb7189677596a7c8706e276 Mon Sep 17 00:00:00 2001 From: Yosef Frost <92753960+FrostyTheSouthernSnowman@users.noreply.github.com> Date: Fri, 7 Jun 2024 06:46:52 -0400 Subject: [PATCH 16/16] Removed broken threshold and hardtanh activation functions --- basalt/autograd/ops/mlops.mojo | 125 ----------------------------- basalt/autograd/ops/ops.mojo | 16 ---- basalt/nn/__init__.mojo | 2 - basalt/nn/activations.mojo | 30 ------- tests/mojo/test_activations.mojo | 87 -------------------- tests/mojo/test_mlops.mojo | 67 ---------------- tests/python/test_mlops_torch.mojo | 31 +------ 7 files changed, 1 insertion(+), 357 deletions(-) diff --git a/basalt/autograd/ops/mlops.mojo b/basalt/autograd/ops/mlops.mojo index defe508d..c9220506 100644 --- a/basalt/autograd/ops/mlops.mojo +++ b/basalt/autograd/ops/mlops.mojo @@ -7,65 +7,6 @@ from basalt.utils.tensorutils import elwise_transform from basalt.autograd.attributes import Attribute, AttributeVector -struct THRESHOLD: - @staticmethod - fn result_shape(t1_shape: TensorShape) -> TensorShape: - return t1_shape - - @staticmethod - fn forward[ - t1_shape: TensorShape, - attributes: AttributeVector, - ](inout res: Tensor[dtype], t1: Tensor[dtype]): - """Forward pass for threshold.""" - - alias THRESHOLD: Scalar[dtype] = attributes[ - "threshold" - ].value().to_scalar[dtype]() - - alias VALUE: Scalar[dtype] = attributes["value"].value().to_scalar[ - dtype - ]() - - @always_inline - fn threshold[ - type: DType, simd_width: Int - ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: - alias casted_threshold = THRESHOLD.cast[type]() - alias casted_value = VALUE.cast[type]() - - return (x > x.splat(casted_threshold)).select[type](x, casted_value) - - elwise_transform[threshold](res, t1) - - @staticmethod - fn backward[ - ug_shape: TensorShape, - t1_shape: TensorShape, - attributes: AttributeVector, - ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: - """Backward pass for threshold.""" - alias THRESHOLD: Scalar[dtype] = attributes[ - "threshold" - ].value().to_scalar[dtype]() - - alias VALUE: Scalar[dtype] = attributes["value"].value().to_scalar[ - dtype - ]() - - @always_inline - fn threshold_bw[ - type: DType, simd_width: Int - ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: - return (x > x.splat(THRESHOLD.cast[type]())).select[type](1, 0) - - var res_grad = Tensor[dtype](t1_shape) - - elwise_transform[threshold_bw](res_grad, t1) - - return res_grad^ - - @value struct SIGMOID: @staticmethod @@ -263,72 +204,6 @@ struct TANH: return res_grad^ -struct HARDTANH: - @staticmethod - fn result_shape(t1_shape: TensorShape) -> TensorShape: - return t1_shape - - @staticmethod - fn forward[ - t1_shape: TensorShape, - attributes: AttributeVector, - ](inout res: Tensor[dtype], t1: Tensor[dtype]): - """Forward pass for hard tanh.""" - - alias MIN_VAL: Scalar[dtype] = attributes["min_val"].value().to_scalar[ - dtype - ]() - - alias MAX_VAL: Scalar[dtype] = attributes["max_val"].value().to_scalar[ - dtype - ]() - - @always_inline - fn hardtanh[ - type: DType, simd_width: Int - ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: - alias casted_min = MIN_VAL.cast[type]() - alias casted_max = MAX_VAL.cast[type]() - - var x_or_min = (x > x.splat(casted_min)).select[type](x, casted_min) - - return (x_or_min < x_or_min.splat(casted_max)).select[type]( - x_or_min, casted_max - ) - - elwise_transform[hardtanh](res, t1) - - @staticmethod - fn backward[ - ug_shape: TensorShape, - t1_shape: TensorShape, - attributes: AttributeVector, - ](ug: Tensor[dtype], t1: Tensor[dtype]) -> Tensor[dtype]: - """Backward pass for hard tanh.""" - alias MIN_VAL: Scalar[dtype] = attributes["min_val"].value().to_scalar[ - dtype - ]() - - alias MAX_VAL: Scalar[dtype] = attributes["max_val"].value().to_scalar[ - dtype - ]() - - @always_inline - fn hardtanh_bw[ - type: DType, simd_width: Int - ](x: SIMD[type, simd_width]) -> SIMD[type, simd_width]: - return ( - x > x.splat(MIN_VAL.cast[type]()) - and x < x.splat(MAX_VAL.cast[type]()) - ).select[type](1, 0) - - var res_grad = Tensor[dtype](t1_shape) - - elwise_transform[hardtanh_bw](res_grad, t1) - - return res_grad^ - - struct CLIP: @staticmethod fn result_shape(t_shape: TensorShape) -> TensorShape: diff --git a/basalt/autograd/ops/ops.mojo b/basalt/autograd/ops/ops.mojo index 53ace786..24221c5e 100644 --- a/basalt/autograd/ops/ops.mojo +++ b/basalt/autograd/ops/ops.mojo @@ -16,12 +16,10 @@ from .basics import ( FMA, ) from .mlops import ( - THRESHOLD, SIGMOID, RELU, LEAKYRELU, TANH, - HARDTANH, CLIP, SQUEEZE, UNSQUEEZE, @@ -72,8 +70,6 @@ struct OP(Stringable): alias CONCAT = OP(23, "CONCAT", dynamic=True) alias SPLIT = OP(24, "SPLIT", dynamic=True) alias SLICE = OP(25, "SLICE") - alias THRESHOLD = OP(26, "THRESHOLD") - alias HARDTANH = OP(27, "HARDTANH") alias LEAKYRELU = OP(28, "LEAKYRELU") var id: UInt8 @@ -137,8 +133,6 @@ fn static_result_shape( return FLATTEN.result_shape(t1_shape) elif op == OP.RESHAPE: return RESHAPE.result_shape(t1_shape, attributes) - elif op == OP.THRESHOLD: - return THRESHOLD.result_shape(t1_shape) elif op == OP.SIGMOID: return SIGMOID.result_shape(t1_shape) elif op == OP.RELU: @@ -147,8 +141,6 @@ fn static_result_shape( return LEAKYRELU.result_shape(t1_shape) elif op == OP.TANH: return TANH.result_shape(t1_shape) - elif op == OP.HARDTANH: - return HARDTANH.result_shape(t1_shape) elif op == OP.TRANSPOSE: return TRANSPOSE.result_shape(t1_shape, attributes) elif op == OP.MAXPOOL2D: @@ -257,8 +249,6 @@ fn forward_op[ FLATTEN.forward[t1_shape](res, t1) elif op == OP.RESHAPE: RESHAPE.forward[t1_shape](res, t1) - elif op == OP.THRESHOLD: - THRESHOLD.forward[t1_shape, attributes](res, t1) elif op == OP.SIGMOID: SIGMOID.forward[t1_shape](res, t1) elif op == OP.RELU: @@ -267,8 +257,6 @@ fn forward_op[ LEAKYRELU.forward[t1_shape, attributes](res, t1) elif op == OP.TANH: TANH.forward[t1_shape](res, t1) - elif op == OP.HARDTANH: - HARDTANH.forward[t1_shape, attributes](res, t1) elif op == OP.TRANSPOSE: TRANSPOSE.forward[t1_shape, attributes](res, t1) elif op == OP.MAXPOOL2D: @@ -381,8 +369,6 @@ fn backward_op[ res_grad = FLATTEN.backward[ug_shape, t1_shape](ug, t1) elif op == OP.RESHAPE: res_grad = RESHAPE.backward[ug_shape, t1_shape](ug, t1) - elif op == OP.THRESHOLD: - res_grad = THRESHOLD.backward[ug_shape, t1_shape, attributes](ug, t1) elif op == OP.SIGMOID: res_grad = SIGMOID.backward[ug_shape, t1_shape](ug, t1) elif op == OP.RELU: @@ -391,8 +377,6 @@ fn backward_op[ res_grad = LEAKYRELU.backward[ug_shape, t1_shape, attributes](ug, t1) elif op == OP.TANH: res_grad = TANH.backward[ug_shape, t1_shape](ug, t1) - elif op == OP.HARDTANH: - res_grad = HARDTANH.backward[ug_shape, t1_shape, attributes](ug, t1) elif op == OP.TRANSPOSE: res_grad = TRANSPOSE.backward[ug_shape, t1_shape, attributes](ug, t1) elif op == OP.MAXPOOL2D: diff --git a/basalt/nn/__init__.mojo b/basalt/nn/__init__.mojo index 47d56f33..c2a06607 100644 --- a/basalt/nn/__init__.mojo +++ b/basalt/nn/__init__.mojo @@ -13,6 +13,4 @@ from .activations import ( LeakyReLU, Sigmoid, Tanh, - Hardtanh, - Threshold, ) diff --git a/basalt/nn/activations.mojo b/basalt/nn/activations.mojo index 61ef51d7..9a83a0fd 100644 --- a/basalt/nn/activations.mojo +++ b/basalt/nn/activations.mojo @@ -4,21 +4,6 @@ from basalt.autograd.attributes import Attribute, AttributeVector # '''Activation functions.''' -fn Threshold( - inout g: Graph, - input: Symbol, - threshold: Scalar[dtype], - value: Scalar[dtype], -) -> Symbol: - return g.op( - OP.THRESHOLD, - input, - attributes=AttributeVector( - Attribute("threshold", threshold), Attribute("value", value) - ), - ) - - fn ReLU(inout g: Graph, input: Symbol) -> Symbol: return g.op(OP.RELU, input) @@ -41,21 +26,6 @@ fn Tanh(inout g: Graph, input: Symbol) -> Symbol: return g.op(OP.TANH, input) -fn Hardtanh( - inout g: Graph, - input: Symbol, - min_val: Scalar[dtype], - max_val: Scalar[dtype], -) -> Symbol: - return g.op( - OP.HARDTANH, - input, - attributes=AttributeVector( - Attribute("min_val", min_val), Attribute("max_val", max_val) - ), - ) - - fn Softmax(inout g: Graph, input: Symbol, axis: Int) -> Symbol: # softmax: exp(x_i) / sum(exp(x_j)) # stable softmax: exp(x_i - max(x_j)) / sum(exp(x_j - max(x_j))) diff --git a/tests/mojo/test_activations.mojo b/tests/mojo/test_activations.mojo index eaee7bef..61a633b8 100644 --- a/tests/mojo/test_activations.mojo +++ b/tests/mojo/test_activations.mojo @@ -11,8 +11,6 @@ from basalt.nn import ( LeakyReLU, Sigmoid, Tanh, - Hardtanh, - Threshold, ) from basalt.autograd import Graph, Symbol from basalt.utils.tensorutils import fill @@ -25,12 +23,6 @@ alias AxisActivation = fn (inout g: Graph, input: Symbol, axis: Int) -> Symbol alias LeakyReLUActivation = fn ( inout g: Graph, input: Symbol, negative_slope: Scalar[dtype] ) -> Symbol -alias ThresholdActivation = fn ( - inout g: Graph, - input: Symbol, - threshold: Scalar[dtype], - value: Scalar[dtype], -) -> Symbol fn create_graph[ @@ -65,35 +57,6 @@ fn create_graph[shape: TensorShape, func: Activation]() -> Graph: return g^ -fn create_graph[ - shape: TensorShape, - func: ThresholdActivation, - threshold: Scalar[dtype], - value: Scalar[dtype], -]() -> Graph: - var g = Graph() - var x = g.input(shape) - var activation = func(g, x, threshold, value) - g.out(activation) - return g^ - - -fn test_graph[ - shape: TensorShape, - func: ThresholdActivation, - nodes: Int, - threshold: Scalar[dtype], - value: Scalar[dtype], -](input: Tensor[dtype], expected: Tensor[dtype]) raises: - alias graph = create_graph[shape, func, threshold, value]() - - var model = Model[graph](inference_only=True) - var res = model.inference(input)[0] - - assert_tensors_equal["almost"](res, expected) - assert_equal(len(graph.nodes), nodes) - - fn test_graph[ shape: TensorShape, func: AxisActivation, @@ -139,26 +102,6 @@ fn test_graph[ assert_equal(len(graph.nodes), nodes, "Node count failed") -fn test_THRESHOLD() raises: - alias shape = TensorShape(2, 3) - alias nodes = 1 - - alias THRESHOLD = 3 - alias VALUE = 2 - - var input = Tensor[dtype](shape) - - for i in range(6): - input[i] = i - - var expected = Tensor[dtype](shape) - - for i in range(6): - expected[i] = i if i > THRESHOLD else VALUE - - test_graph[shape, Threshold, nodes, THRESHOLD, VALUE](input, expected) - - fn test_SOFTMAX() raises: alias shape = TensorShape(2, 3, 2) alias nodes = 5 @@ -259,43 +202,13 @@ fn test_TANH() raises: test_graph[shape, Tanh, nodes](input, expected) -fn test_HARDTANH() raises: - alias shape = TensorShape(3, 3) - alias nodes = 1 - - alias MIN_VAL = -2 - alias MAX_VAL = 2 - - var input = Tensor[dtype](shape) - - for i in range(9): - input[i] = i - 4 - - var expected = Tensor[dtype](shape) - - for j in range(0, 9): - var i = j - 4 - if i < MIN_VAL: - expected[j] = MIN_VAL - - elif i > MAX_VAL: - expected[j] = MAX_VAL - - else: - expected[j] = i - - test_graph[shape, Hardtanh, nodes, MIN_VAL, MAX_VAL](input, expected) - - fn main(): try: - test_THRESHOLD() test_SOFTMAX() test_LOGSOFTMAX() test_RELU() test_SIGMOID() test_TANH() - test_HARDTANH() except e: print("[ERROR] Error in activations") print(e) diff --git a/tests/mojo/test_mlops.mojo b/tests/mojo/test_mlops.mojo index fe3ddb5d..620f00f1 100644 --- a/tests/mojo/test_mlops.mojo +++ b/tests/mojo/test_mlops.mojo @@ -4,10 +4,8 @@ from basalt.autograd.attributes import AttributeVector, Attribute from basalt.autograd.ops.mlops import ( SIGMOID, RELU, - THRESHOLD, LEAKYRELU, TANH, - HARDTANH, CLIP, SQUEEZE, UNSQUEEZE, @@ -23,39 +21,6 @@ from tests import ( ) -fn test_THRESHOLD() raises: - alias t1_shape = TensorShape(2, 3) - var t1: Tensor[dtype] = Tensor[dtype](t1_shape) - fill(t1, 4.0) - - var expected = Tensor[dtype](2, 3) - fill(expected, 4.0) - - test_unary_op[ - OP.THRESHOLD, - t1_shape, - AttributeVector(Attribute("threshold", 3), Attribute("value", 2)), - ](t1, expected) - - -fn test_backward_THRESHOLD() raises: - alias t1_shape = TensorShape(2, 3) - alias ug_shape = TensorShape(2, 3) - var t1: Tensor[dtype] = Tensor[dtype](t1_shape) - var ug: Tensor[dtype] = Tensor[dtype](ug_shape) - fill(ug, 2.0) - - var expected_grad = Tensor[dtype](2, 3) - fill(expected_grad, 0) - - test_unary_op_backward[ - OP.THRESHOLD, - t1_shape, - ug_shape, - AttributeVector(Attribute("threshold", 3), Attribute("value", 2)), - ](t1, ug, expected_grad) - - fn test_SIGMOID() raises: alias t1_shape = TensorShape(2, 3) var t1: Tensor[dtype] = Tensor[dtype](t1_shape) @@ -191,38 +156,6 @@ fn test_backward_TANH() raises: test_unary_op_backward[OP.TANH, t1_shape, ug_shape](t1, ug, expected_grad) -fn test_HARDTANH() raises: - alias t1_shape = TensorShape(2, 3) - var t1: Tensor[dtype] = Tensor[dtype](t1_shape) - - var expected = Tensor[dtype](2, 3) - fill(expected, 0.0) - - test_unary_op[ - OP.HARDTANH, - t1_shape, - AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)), - ](t1, expected) - - -fn test_backward_HARDTANH() raises: - alias t1_shape = TensorShape(2, 3) - alias ug_shape = TensorShape(2, 3) - var t1: Tensor[dtype] = Tensor[dtype](t1_shape) - var ug: Tensor[dtype] = Tensor[dtype](ug_shape) - fill(ug, 5.0) - - var expected_grad = Tensor[dtype](2, 3) - fill(expected_grad, 0) # 5 > 3, so slope is 0. - - test_unary_op_backward[ - OP.HARDTANH, - t1_shape, - ug_shape, - AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)), - ](t1, ug, expected_grad) - - fn test_CLIP() raises: alias t1_shape = TensorShape(2, 3) var t1: Tensor[dtype] = Tensor[dtype](t1_shape) diff --git a/tests/python/test_mlops_torch.mojo b/tests/python/test_mlops_torch.mojo index 2cdbf043..5cba639c 100644 --- a/tests/python/test_mlops_torch.mojo +++ b/tests/python/test_mlops_torch.mojo @@ -43,7 +43,7 @@ fn torch_unary_op( var expected: PythonObject - if op == OP.SIGMOID: + elif op == OP.SIGMOID: expected = torch.sigmoid(input_1) elif op == OP.RELU: expected = torch.relu(input_1) @@ -54,12 +54,6 @@ fn torch_unary_op( ) elif op == OP.TANH: expected = torch.tanh(input_1) - elif op == OP.HARDTANH: - expected = torch.nn.functional.hardtanh( - input_1, - min_val=attrs.value()["min_val"].value().to_scalar[dtype](), - max_val=attrs.value()["max_val"].value().to_scalar[dtype](), - ) elif op == OP.CLIP: var min_attr = attrs.value()["min"] var max_attr = attrs.value()["max"] @@ -215,29 +209,6 @@ fn test_TANH() raises: ) -fn test_HARDTANH() raises: - alias t1_shape = TensorShape(37, 63, 107) - alias ug_shape = TensorShape(37, 63, 107) - var t1: Tensor[dtype] = Tensor[dtype](t1_shape) - rand(t1.data(), t1.num_elements()) - - var ug = Tensor[dtype](ug_shape) - rand(ug.data(), ug.num_elements()) - - var expected_and_grad = torch_unary_op(OP.HARDTANH, t1, ug) - test_unary_op[ - OP.HARDTANH, - t1_shape, - AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)), - ](t1, expected_and_grad.expected) - test_unary_op_backward[ - OP.HARDTANH, - t1_shape, - ug_shape, - AttributeVector(Attribute("min_val", -3), Attribute("max_val", 3)), - ](t1, ug, expected_and_grad.grad_1) - - fn test_CLIP() raises: alias t1_shape = TensorShape(37, 63, 107) alias ug_shape = TensorShape(37, 63, 107)