LocalResponseNormalization: We're spending about half of the time in this function converting to and from double. Do the computation in the float domain, I don't think there is any risk of numerical instability here.

tensorflower-gardener · tensorflower-gardener · commit 9065899e9252 · 2020-07-10T05:53:07.000-07:00
PiperOrigin-RevId: 320588654
Change-Id: Ia641f6359b5966aa669de037d355292a25c08bed
diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@@ -3815,6 +3815,7 @@ inline void LocalResponseNormalization(
   const int double_range = op_params.range * 2;
   Eigen::VectorXf padded_square(data_in.rows() + double_range);
   padded_square.setZero();
+  const float bias = op_params.bias;
   for (int r = 0; r < data_in.cols(); ++r) {
     // Do local response normalization for data_in(:, r)
     // first, compute the square and store them in buffer for repeated use
@@ -3827,7 +3828,7 @@ inline void LocalResponseNormalization(
     }
     for (int i = 0; i < data_in.rows(); ++i) {
       accumulated_scale += padded_square(i + double_range);
-      data_out(i, r) = op_params.bias + accumulated_scale;
+      data_out(i, r) = bias + accumulated_scale;
       accumulated_scale -= padded_square(i);
     }
   }

Original file line number	Diff line number	Diff line change
`@@ -3815,6 +3815,7 @@ inline void LocalResponseNormalization(`
`3815`	`3815`	`const int double_range = op_params.range * 2;`
`3816`	`3816`	`Eigen::VectorXf padded_square(data_in.rows() + double_range);`
`3817`	`3817`	`padded_square.setZero();`
	`3818`	`+ const float bias = op_params.bias;`
`3818`	`3819`	`for (int r = 0; r < data_in.cols(); ++r) {`
`3819`	`3820`	`// Do local response normalization for data_in(:, r)`
`3820`	`3821`	`// first, compute the square and store them in buffer for repeated use`
`@@ -3827,7 +3828,7 @@ inline void LocalResponseNormalization(`
`3827`	`3828`	`}`
`3828`	`3829`	`for (int i = 0; i < data_in.rows(); ++i) {`
`3829`	`3830`	`accumulated_scale += padded_square(i + double_range);`
`3830`		`- data_out(i, r) = op_params.bias + accumulated_scale;`
	`3831`	`+ data_out(i, r) = bias + accumulated_scale;`
`3831`	`3832`	`accumulated_scale -= padded_square(i);`
`3832`	`3833`	`}`
`3833`	`3834`	`}`