From 797312e75893763e235bba8caeea0b6c05bc05aa Mon Sep 17 00:00:00 2001
From: klosax <131523366+klosax@users.noreply.github.com>
Date: Thu, 24 Aug 2023 14:18:57 +0200
Subject: [PATCH 1/3] ggml.c : use double precision for tanh

---
 ggml.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml.c b/ggml.c
index 8cb5c404f285d..7d40cf815d87b 100644
--- a/ggml.c
+++ b/ggml.c
@@ -3550,7 +3550,7 @@ inline static void ggml_vec_log_f32  (const int n, float * y, const float * x) {
 inline static void ggml_vec_abs_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); }
 inline static void ggml_vec_sgn_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
 inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
-inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]);  }
+inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanh(x[i]);  }
 inline static void ggml_vec_elu_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
 inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
 
@@ -3559,7 +3559,7 @@ static const float GELU_QUICK_COEF = -1.702f;
 static const float SQRT_2_OVER_PI  = 0.79788456080286535587989211986876f;
 
 inline static float ggml_gelu_f32(float x) {
-    return 0.5f*x*(1.0f + tanhf(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x)));
+    return 0.5*(double)x*(1.0 + tanh(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x)));
 }
 
 inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {

From 84e8da665d3982aede690a6c7244ff4b37ee5d7d Mon Sep 17 00:00:00 2001
From: klosax <131523366+klosax@users.noreply.github.com>
Date: Thu, 24 Aug 2023 15:13:18 +0200
Subject: [PATCH 2/3] ggml.c : use ggml_float for gelu

---
 ggml.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ggml.c b/ggml.c
index 7d40cf815d87b..15206ea364d00 100644
--- a/ggml.c
+++ b/ggml.c
@@ -3554,12 +3554,13 @@ inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) {
 inline static void ggml_vec_elu_f32  (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
 inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
 
-static const float GELU_COEF_A     = 0.044715f;
-static const float GELU_QUICK_COEF = -1.702f;
-static const float SQRT_2_OVER_PI  = 0.79788456080286535587989211986876f;
+static const float GELU_QUICK_COEF      = -1.702f;
+static const ggml_float GELU_COEF_A     = 0.044715;
+static const ggml_float SQRT_2_OVER_PI  = 0.79788456080286535587989211986876;
 
 inline static float ggml_gelu_f32(float x) {
-    return 0.5*(double)x*(1.0 + tanh(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x)));
+    const ggml_float xx = (ggml_float) x;
+    return 0.5*xx*(1.0 + tanh(SQRT_2_OVER_PI*xx*(1.0 + GELU_COEF_A*xx*xx)));
 }
 
 inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {

From 252647cf5554506960337cfcc67de355fd982987 Mon Sep 17 00:00:00 2001
From: klosax <131523366+klosax@users.noreply.github.com>
Date: Thu, 24 Aug 2023 23:02:01 +0200
Subject: [PATCH 3/3] ggml.c : undefine GGML_GELU_FP16

---
 ggml.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml.c b/ggml.c
index 15206ea364d00..ad1a6b38637c9 100644
--- a/ggml.c
+++ b/ggml.c
@@ -120,7 +120,7 @@ typedef void * thread_ret_t;
 
 /*#define GGML_PERF*/
 #define GGML_DEBUG 0
-#define GGML_GELU_FP16
+//#define GGML_GELU_FP16
 #define GGML_GELU_QUICK_FP16
 #define GGML_SILU_FP16