diff --git a/BUILD b/BUILD
index d5f1063377..c3c58b4eef 100644
--- a/BUILD
+++ b/BUILD
@@ -31,6 +31,7 @@ pyx_library(
     ),
     deps = [
         "//cpp/fury/util:fury_util",
+        "//cpp/fury/python:pyunicode",
     ],
 )
 
@@ -63,6 +64,7 @@ pyx_library(
     deps = [
         "//cpp/fury/util:fury_util",
         "//cpp/fury/type:fury_type",
+        "//cpp/fury/python:pyunicode",
         "@com_google_absl//absl/container:flat_hash_map",
     ],
 )
@@ -83,6 +85,7 @@ pyx_library(
     ),
     deps = [
         "//cpp/fury:fury",
+        "//cpp/fury/python:pyunicode",
         "@local_config_pyarrow//:python_numpy_headers",
         "@local_config_pyarrow//:arrow_python_shared_library"
     ],
diff --git a/cpp/fury/python/BUILD b/cpp/fury/python/BUILD
new file mode 100644
index 0000000000..6b218d0a60
--- /dev/null
+++ b/cpp/fury/python/BUILD
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")
+load("@com_github_grpc_grpc//bazel:cython_library.bzl", "pyx_library")
+
+cc_library(
+    name = "pyunicode",
+    srcs = ["pyunicode.cc"],
+    hdrs = ["pyunicode.h"],
+    alwayslink=True,
+    linkstatic=True,
+    strip_include_prefix = "/cpp",
+    deps = [
+        "//cpp/fury/util:fury_util",
+        "@local_config_python//:python_headers",
+    ],
+    visibility = ["//visibility:public"],
+)
diff --git a/cpp/fury/python/pyunicode.cc b/cpp/fury/python/pyunicode.cc
new file mode 100644
index 0000000000..3c50e00b98
--- /dev/null
+++ b/cpp/fury/python/pyunicode.cc
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "pyunicode.h"
+#include "fury/util/array_util.h"
+#include "fury/util/logging.h"
+#include "fury/util/string_util.h"
+#include "unicodeobject.h"
+#include <cassert>
+
+namespace fury {
+
+static PyObject *unicode_latin1[256] = {nullptr};
+
+static PyObject *get_latin1_char(unsigned char ch) {
+  PyObject *unicode = unicode_latin1[ch];
+  if (!unicode) {
+    unicode = PyUnicode_New(1, ch);
+    if (!unicode)
+      return NULL;
+    PyUnicode_1BYTE_DATA(unicode)[0] = ch;
+    unicode_latin1[ch] = unicode;
+  }
+  Py_INCREF(unicode);
+  return unicode;
+}
+
+PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size) {
+  PyObject *res;
+  unsigned char max_char;
+  FURY_CHECK(size > 0);
+  if (size == 1)
+    return get_latin1_char(u[0]);
+  max_char = isAscii(reinterpret_cast<const char *>(u), size) ? 127 : 255;
+  res = PyUnicode_New(size, max_char);
+  if (!res)
+    return NULL;
+  memcpy(PyUnicode_1BYTE_DATA(res), u, size);
+  return res;
+}
+
+PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size) {
+  PyObject *res;
+  Py_UCS2 max_char;
+  FURY_CHECK(size > 0);
+  if (size == 1) {
+    max_char = u[0];
+    if (max_char < 256) {
+      return get_latin1_char(max_char);
+    } else {
+      res = PyUnicode_New(1, max_char);
+      if (res == NULL) {
+        return NULL;
+      }
+      if (PyUnicode_KIND(res) == PyUnicode_2BYTE_KIND) {
+        PyUnicode_2BYTE_DATA(res)[0] = (Py_UCS2)max_char;
+      } else {
+        FURY_CHECK(PyUnicode_KIND(res) == PyUnicode_4BYTE_KIND);
+        PyUnicode_4BYTE_DATA(res)[0] = max_char;
+      }
+      return res;
+    }
+  }
+  max_char = getMaxValue(u, size);
+  res = PyUnicode_New(size, max_char);
+  if (!res) {
+    return NULL;
+  }
+  if (max_char >= 256) {
+    memcpy(PyUnicode_2BYTE_DATA(res), u, sizeof(Py_UCS2) * size);
+  } else {
+    copyArray(u, PyUnicode_1BYTE_DATA(res), size);
+  }
+  return res;
+}
+} // namespace fury
diff --git a/cpp/fury/python/pyunicode.h b/cpp/fury/python/pyunicode.h
new file mode 100644
index 0000000000..0f4ddeb793
--- /dev/null
+++ b/cpp/fury/python/pyunicode.h
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+#include "Python.h"
+#include "object.h"
+#include "pyport.h"
+#include <cstdint>
+#include <cstring>
+#include <string>
+
+namespace fury {
+
+PyObject *Fury_PyUnicode_FromUCS1(const char *u, Py_ssize_t size);
+
+PyObject *Fury_PyUnicode_FromUCS2(const uint16_t *u, Py_ssize_t size);
+
+} // namespace fury
diff --git a/cpp/fury/util/BUILD b/cpp/fury/util/BUILD
index 8f605dc75e..124825b9b3 100644
--- a/cpp/fury/util/BUILD
+++ b/cpp/fury/util/BUILD
@@ -62,4 +62,16 @@ cc_test(
         ":fury_util",
         "@com_google_googletest//:gtest",
     ],
-)
\ No newline at end of file
+)
+
+
+cc_test(
+    name = "array_util_test",
+    srcs = ["array_util_test.cc"],
+    deps = [
+        ":fury_util",
+        "@com_google_googletest//:gtest",
+    ],
+    copts = ["-mavx2"],  # Enable AVX2 support
+    linkopts = ["-mavx2"],  # Ensure linker also knows about AVX2
+)
diff --git a/cpp/fury/util/array_util.cc b/cpp/fury/util/array_util.cc
new file mode 100644
index 0000000000..1182e5c19a
--- /dev/null
+++ b/cpp/fury/util/array_util.cc
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "fury/util/array_util.h"
+
+namespace fury {
+#if defined(FURY_HAS_NEON)
+uint16_t getMaxValue(const uint16_t *arr, size_t length) {
+  if (length == 0) {
+    return 0; // Return 0 for empty arrays
+  }
+  uint16x8_t max_val = vdupq_n_u16(0); // Initialize max vector to zero
+
+  size_t i = 0;
+  for (; i + 8 <= length; i += 8) {
+    uint16x8_t current_val = vld1q_u16(&arr[i]);
+    max_val = vmaxq_u16(max_val, current_val); // Max operation
+  }
+
+  // Find the max value in the resulting vector
+  uint16_t temp[8];
+  vst1q_u16(temp, max_val);
+  uint16_t max_neon = temp[0];
+  for (int j = 1; j < 8; j++) {
+    if (temp[j] > max_neon) {
+      max_neon = temp[j];
+    }
+  }
+
+  // Handle remaining elements
+  for (; i < length; i++) {
+    if (arr[i] > max_neon) {
+      max_neon = arr[i];
+    }
+  }
+  return max_neon;
+}
+
+void copyArray(const uint16_t *from, uint8_t *to, size_t length) {
+  size_t i = 0;
+  for (; i + 7 < length; i += 8) {
+    uint16x8_t src = vld1q_u16(&from[i]);
+    uint8x8_t result = vmovn_u16(src);
+    vst1_u8(&to[i], result);
+  }
+
+  // Fallback for the remainder
+  for (; i < length; ++i) {
+    to[i] = static_cast<uint8_t>(from[i]);
+  }
+}
+#elif defined(FURY_HAS_SSE2)
+uint16_t getMaxValue(const uint16_t *arr, size_t length) {
+  if (length == 0) {
+    return 0; // Return 0 for empty arrays
+  }
+
+  __m128i max_val = _mm_setzero_si128(); // Initialize max vector with zeros
+
+  size_t i = 0;
+  for (; i + 8 <= length; i += 8) {
+    __m128i current_val = _mm_loadu_si128((__m128i *)&arr[i]);
+    max_val = _mm_max_epu16(max_val, current_val); // Max operation
+  }
+
+  // Find the max value in the resulting vector
+  uint16_t temp[8];
+  _mm_storeu_si128((__m128i *)temp, max_val);
+  uint16_t max_sse = temp[0];
+  for (int j = 1; j < 8; j++) {
+    if (temp[j] > max_sse) {
+      max_sse = temp[j];
+    }
+  }
+
+  // Handle remaining elements
+  for (; i < length; i++) {
+    if (arr[i] > max_sse) {
+      max_sse = arr[i];
+    }
+  }
+  return max_sse;
+}
+
+void copyArray(const uint16_t *from, uint8_t *to, size_t length) {
+  size_t i = 0;
+  __m128i mask = _mm_set1_epi16(0xFF); // Mask to zero out the high byte
+  for (; i + 7 < length; i += 8) {
+    __m128i src = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&from[i]));
+    __m128i result = _mm_and_si128(src, mask);
+    _mm_storel_epi64(reinterpret_cast<__m128i *>(&to[i]),
+                     _mm_packus_epi16(result, result));
+  }
+
+  // Fallback for the remainder
+  for (; i < length; ++i) {
+    to[i] = static_cast<uint8_t>(from[i]);
+  }
+}
+#else
+uint16_t getMaxValue(const uint16_t *arr, size_t length) {
+  if (length == 0) {
+    return 0; // Return 0 for empty arrays
+  }
+  uint16_t max_val = arr[0];
+  for (size_t i = 1; i < length; i++) {
+    if (arr[i] > max_val) {
+      max_val = arr[i];
+    }
+  }
+  return max_val;
+}
+
+void copyArray(const uint16_t *from, uint8_t *to, size_t length) {
+  // Fallback for systems without SSE2/NEON
+  for (size_t i = 0; i < length; ++i) {
+    to[i] = static_cast<uint8_t>(from[i]);
+  }
+}
+#endif
+} // namespace fury
diff --git a/cpp/fury/util/array_util.h b/cpp/fury/util/array_util.h
new file mode 100644
index 0000000000..45eb0d33c7
--- /dev/null
+++ b/cpp/fury/util/array_util.h
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+#include "fury/util/platform.h"
+#include <cstdint>
+#include <stdlib.h>
+
+namespace fury {
+uint16_t getMaxValue(const uint16_t *arr, size_t length);
+
+void copyArray(const uint16_t *from, uint8_t *to, size_t length);
+} // namespace fury
diff --git a/cpp/fury/util/array_util_test.cc b/cpp/fury/util/array_util_test.cc
new file mode 100644
index 0000000000..eb9eebd7f5
--- /dev/null
+++ b/cpp/fury/util/array_util_test.cc
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "fury/util/array_util.h"
+#include "gtest/gtest.h"
+
+namespace fury {
+TEST(GetMaxValueTest, HandlesEmptyArray) {
+  uint16_t *arr = nullptr;
+  EXPECT_EQ(getMaxValue(arr, 0), 0);
+}
+
+TEST(GetMaxValueTest, HandlesSingleElementArray) {
+  uint16_t arr[] = {42};
+  EXPECT_EQ(getMaxValue(arr, 1), 42);
+}
+
+TEST(GetMaxValueTest, HandlesSmallArray) {
+  uint16_t arr[] = {10, 20, 30, 40, 5};
+  EXPECT_EQ(getMaxValue(arr, 5), 40);
+}
+
+TEST(GetMaxValueTest, HandlesLargeArray) {
+  const size_t length = 1024;
+  uint16_t arr[length];
+  for (size_t i = 0; i < length; ++i) {
+    arr[i] = static_cast<uint16_t>(i);
+  }
+  EXPECT_EQ(getMaxValue(arr, length), 1023);
+}
+} // namespace fury
+
+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/cpp/fury/util/platform.h b/cpp/fury/util/platform.h
index 70a699bc20..9aa562a3db 100644
--- a/cpp/fury/util/platform.h
+++ b/cpp/fury/util/platform.h
@@ -17,16 +17,19 @@
  * under the License.
  */
 
+#pragma once
+
 #if defined(__x86_64__) || defined(_M_X64)
 #include <immintrin.h>
 #define FURY_HAS_IMMINTRIN
 #elif defined(__ARM_NEON) || defined(__ARM_NEON__)
 #include <arm_neon.h>
 #define FURY_HAS_NEON
-#elif defined(__SSE2__)
-#include <emmintrin.h>
-#define FURY_HAS_SSE2
 #elif defined(__riscv) && __riscv_vector
 #include <riscv_vector.h>
 #define FURY_HAS_RISCV_VECTOR
 #endif
+#if defined(__SSE2__)
+#include <emmintrin.h>
+#define FURY_HAS_SSE2
+#endif
diff --git a/cpp/fury/util/string_util_test.cc b/cpp/fury/util/string_util_test.cc
index f57f75186f..080fd603ed 100644
--- a/cpp/fury/util/string_util_test.cc
+++ b/cpp/fury/util/string_util_test.cc
@@ -58,21 +58,34 @@ bool isAscii_BaseLine(const std::string &str) {
 TEST(StringUtilTest, TestisAsciiFunctions) {
   std::string testStr = generateRandomString(100000);
   auto start_time = std::chrono::high_resolution_clock::now();
-  bool result = isAscii_BaseLine(testStr);
+  bool result;
+  int c = 0;
+  for (size_t i = 0; i < 10000; i++) {
+    result = isAscii_BaseLine(testStr);
+    if (result) {
+      c++;
+    }
+  }
+
   auto end_time = std::chrono::high_resolution_clock::now();
   auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
                       end_time - start_time)
                       .count();
   FURY_LOG(INFO) << "BaseLine Running Time: " << duration << " ns.";
-
+  FURY_LOG(DEBUG) << "Avoid compiler optimized loop " << c;
   start_time = std::chrono::high_resolution_clock::now();
-  result = isAscii(testStr);
+  for (size_t i = 0; i < 10000; i++) {
+    result = isAscii(testStr);
+    if (result) {
+      c++;
+    }
+  }
   end_time = std::chrono::high_resolution_clock::now();
   duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end_time -
                                                                   start_time)
                  .count();
   FURY_LOG(INFO) << "Optimized Running Time: " << duration << " ns.";
-
+  FURY_LOG(DEBUG) << "Avoid compiler optimized loop " << c;
   EXPECT_TRUE(result);
 }
 
diff --git a/python/pyfury/_util.pyx b/python/pyfury/_util.pyx
index 3d0ac05fd9..d439ba8059 100644
--- a/python/pyfury/_util.pyx
+++ b/python/pyfury/_util.pyx
@@ -27,7 +27,8 @@ from libcpp.memory cimport shared_ptr, make_shared
 from libc.stdint cimport *
 from libcpp cimport bool as c_bool
 from pyfury.includes.libutil cimport(
-    CBuffer, AllocateBuffer, GetBit, SetBit, ClearBit, SetBitTo, CStatus, StatusCode, utf16HasSurrogatePairs
+    CBuffer, AllocateBuffer, GetBit, SetBit, ClearBit, SetBitTo, CStatus, StatusCode, utf16HasSurrogatePairs,
+    Fury_PyUnicode_FromUCS1, Fury_PyUnicode_FromUCS2
 )
 
 cdef int32_t max_buffer_size = 2 ** 31 - 1
@@ -573,12 +574,15 @@ cdef class Buffer:
         cdef uint64_t header = self.read_varuint64()
         cdef uint32_t size = header >> 2
         self.check_bound(self.reader_index, size)
+        if size == 0:
+            return ""
         cdef const char * buf = <const char *>(self.c_buffer.get().data() + self.reader_index)
         self.reader_index += size
         cdef uint32_t encoding = header & <uint32_t>0b11
         if encoding == 0:
             # PyUnicode_FromASCII
-            return PyUnicode_DecodeLatin1(buf, size, "strict")
+            return <unicode>Fury_PyUnicode_FromUCS1(buf, size)
+            # return PyUnicode_DecodeLatin1(buf, size, "strict")
         elif encoding == 1:
             if utf16HasSurrogatePairs(<const uint16_t *>buf, size >> 1):
                 return PyUnicode_DecodeUTF16(
@@ -588,7 +592,8 @@ cdef class Buffer:
                     &UTF16_LE,  # fury use little-endian
                 )
             else:
-                return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, buf, size >> 1)
+                # return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, buf, size >> 1)
+                return <unicode>Fury_PyUnicode_FromUCS2(<const uint16_t *>buf, size >> 1)
         else:
             return PyUnicode_DecodeUTF8(buf, size, "strict")
 
diff --git a/python/pyfury/includes/libutil.pxd b/python/pyfury/includes/libutil.pxd
index 72a640033d..b79287a659 100644
--- a/python/pyfury/includes/libutil.pxd
+++ b/python/pyfury/includes/libutil.pxd
@@ -19,6 +19,7 @@ from libc.stdint cimport *
 from libcpp cimport bool as c_bool
 from libcpp.memory cimport shared_ptr
 from libcpp.string cimport string as c_string
+from cpython cimport PyObject
 
 cdef extern from "fury/util/buffer.h" namespace "fury" nogil:
     cdef cppclass CStatus" fury::Status":
@@ -111,3 +112,8 @@ cdef extern from "fury/util/bit_util.h" namespace "fury::util" nogil:
 
 cdef extern from "fury/util/string_util.h" namespace "fury" nogil:
     c_bool utf16HasSurrogatePairs(uint16_t* data, size_t size)
+
+
+cdef extern from "fury/python/pyunicode.h" namespace "fury" nogil:
+    PyObject* Fury_PyUnicode_FromUCS1(const char* u, Py_ssize_t size)
+    PyObject* Fury_PyUnicode_FromUCS2(const uint16_t* u, Py_ssize_t size)