diff --git a/Data/Aeson/Parser/Internal.hs b/Data/Aeson/Parser/Internal.hs
index 63393c14c..a506a976a 100644
--- a/Data/Aeson/Parser/Internal.hs
+++ b/Data/Aeson/Parser/Internal.hs
@@ -34,35 +34,20 @@ module Data.Aeson.Parser.Internal
 import Prelude ()
 import Prelude.Compat
 
-import Control.Monad.IO.Class (liftIO)
 import Data.Aeson.Types.Internal (IResult(..), JSONPath, Result(..), Value(..))
 import Data.Attoparsec.ByteString.Char8 (Parser, char, endOfInput, scientific,
                                          skipSpace, string)
-import Data.Bits ((.|.), shiftL)
-import Data.ByteString.Internal (ByteString(..))
-import Data.Char (chr)
 import Data.Text (Text)
-import Data.Text.Encoding (decodeUtf8')
-import Data.Text.Internal.Encoding.Utf8 (ord2, ord3, ord4)
-import Data.Text.Internal.Unsafe.Char (ord)
-import Data.Vector as Vector (Vector, empty, fromList, reverse)
-import Data.Word (Word8)
-import Foreign.ForeignPtr (withForeignPtr)
-import Foreign.Marshal.Utils (copyBytes)
-import Foreign.Ptr (Ptr, minusPtr, plusPtr)
-import Foreign.Storable (poke)
-import System.IO.Unsafe (unsafePerformIO)
+import Data.Vector as Vector (Vector, empty, fromListN, reverse)
 import qualified Data.Attoparsec.ByteString as A
 import qualified Data.Attoparsec.Lazy as L
-import qualified Data.Attoparsec.Zepto as Z
 import qualified Data.ByteString as B
-import qualified Data.ByteString.Internal as B
 import qualified Data.ByteString.Lazy as L
-import qualified Data.ByteString.Unsafe as B
 import qualified Data.HashMap.Strict as H
+import Data.Aeson.Parser.Unescape
 
 #if MIN_VERSION_ghc_prim(0,3,1)
-import GHC.Base (Int#, (==#), isTrue#, orI#, word2Int#)
+import GHC.Base (Int#, (==#), isTrue#, word2Int#)
 import GHC.Word (Word8(W8#))
 #endif
 
@@ -125,16 +110,18 @@ objectValues str val = do
   w <- A.peekWord8'
   if w == CLOSE_CURLY
     then A.anyWord8 >> return H.empty
-    else loop H.empty
+    else loop []
  where
-  loop m0 = do
+  -- Why use acc pattern here, you may ask? because 'H.fromList' use 'unsafeInsert'
+  -- and it's much faster because it's doing in place update to the 'HashMap'!
+  loop acc = do
     k <- str <* skipSpace <* char ':'
     v <- val <* skipSpace
-    let !m = H.insert k v m0
     ch <- A.satisfy $ \w -> w == COMMA || w == CLOSE_CURLY
+    let acc' = (k, v) : acc
     if ch == COMMA
-      then skipSpace >> loop m
-      else return m
+      then skipSpace >> loop acc'
+      else return (H.fromList acc')
 {-# INLINE objectValues #-}
 
 array_ :: Parser Value
@@ -151,14 +138,14 @@ arrayValues val = do
   w <- A.peekWord8'
   if w == CLOSE_SQUARE
     then A.anyWord8 >> return Vector.empty
-    else loop []
+    else loop [] 1
   where
-    loop acc = do
+    loop acc !len = do
       v <- val <* skipSpace
       ch <- A.satisfy $ \w -> w == COMMA || w == CLOSE_SQUARE
       if ch == COMMA
-        then skipSpace >> loop (v:acc)
-        else return (Vector.reverse (Vector.fromList (v:acc)))
+        then skipSpace >> loop (v:acc) (len+1)
+        else return (Vector.reverse (Vector.fromListN len (v:acc)))
 {-# INLINE arrayValues #-}
 
 -- | Parse any JSON value.  You should usually 'json' in preference to
@@ -214,101 +201,30 @@ jstring = A.word8 DOUBLE_QUOTE *> jstring_
 jstring_ :: Parser Text
 {-# INLINE jstring_ #-}
 jstring_ = {-# SCC "jstring_" #-} do
-  (s, fin) <- A.runScanner startState go
-  _ <- A.anyWord8
-  s1 <- if isEscaped fin
-        then case unescape s of
-               Right r  -> return r
-               Left err -> fail err
-        else return s
-  case decodeUtf8' s1 of
+  s <- A.scan startState go <* A.anyWord8
+  case unescapeText s of
     Right r  -> return r
     Left err -> fail $ show err
  where
 #if MIN_VERSION_ghc_prim(0,3,1)
-    isEscaped (S _ escaped) = isTrue# escaped
-    startState              = S 0# 0#
-    go (S a b) (W8# c)
-      | isTrue# a                     = Just (S 0# b)
+    startState              = S 0#
+    go (S a) (W8# c)
+      | isTrue# a                     = Just (S 0#)
       | isTrue# (word2Int# c ==# 34#) = Nothing   -- double quote
       | otherwise = let a' = word2Int# c ==# 92#  -- backslash
-                    in Just (S a' (orI# a' b))
+                    in Just (S a')
 
-data S = S Int# Int#
+data S = S Int#
 #else
-    isEscaped (S _ escaped) = escaped
-    startState              = S False False
-    go (S a b) c
-      | a                  = Just (S False b)
+    startState              = False
+    go a c
+      | a                  = Just False
       | c == DOUBLE_QUOTE  = Nothing
       | otherwise = let a' = c == backslash
-                    in Just (S a' (a' || b))
+                    in Just a'
       where backslash = BACKSLASH
-
-data S = S !Bool !Bool
 #endif
 
-unescape :: ByteString -> Either String ByteString
-unescape s = unsafePerformIO $ do
-  let len = B.length s
-  fp <- B.mallocByteString len
-  -- We perform no bounds checking when writing to the destination
-  -- string, as unescaping always makes it shorter than the source.
-  withForeignPtr fp $ \ptr -> do
-    ret <- Z.parseT (go ptr) s
-    case ret of
-      Left err -> return (Left err)
-      Right p -> do
-        let newlen = p `minusPtr` ptr
-            slop = len - newlen
-        Right <$> if slop >= 128 && slop >= len `quot` 4
-                  then B.create newlen $ \np -> copyBytes np ptr newlen
-                  else return (PS fp 0 newlen)
- where
-  go ptr = do
-    h <- Z.takeWhile (/=BACKSLASH)
-    let rest = do
-          start <- Z.take 2
-          let !slash = B.unsafeHead start
-              !t = B.unsafeIndex start 1
-              escape = case B.elemIndex t "\"\\/ntbrfu" of
-                         Just i -> i
-                         _      -> 255
-          if slash /= BACKSLASH || escape == 255
-            then fail "invalid JSON escape sequence"
-            else
-            if t /= 117 -- 'u'
-              then copy h ptr >>= word8 (B.unsafeIndex mapping escape) >>= go
-              else do
-                   a <- hexQuad
-                   if a < 0xd800 || a > 0xdfff
-                     then copy h ptr >>= charUtf8 (chr a) >>= go
-                     else do
-                       b <- Z.string "\\u" *> hexQuad
-                       if a <= 0xdbff && b >= 0xdc00 && b <= 0xdfff
-                         then let !c = ((a - 0xd800) `shiftL` 10) +
-                                       (b - 0xdc00) + 0x10000
-                              in copy h ptr >>= charUtf8 (chr c) >>= go
-                         else fail "invalid UTF-16 surrogates"
-    done <- Z.atEnd
-    if done
-      then copy h ptr
-      else rest
-  mapping = "\"\\/\n\t\b\r\f"
-
-hexQuad :: Z.ZeptoT IO Int
-hexQuad = do
-  s <- Z.take 4
-  let hex n | w >= C_0 && w <= C_9 = w - C_0
-            | w >= C_a && w <= C_f = w - 87
-            | w >= C_A && w <= C_F = w - 55
-            | otherwise          = 255
-        where w = fromIntegral $ B.unsafeIndex s n
-      a = hex 0; b = hex 1; c = hex 2; d = hex 3
-  if (a .|. b .|. c .|. d) /= 255
-    then return $! d .|. (c `shiftL` 4) .|. (b `shiftL` 8) .|. (a `shiftL` 12)
-    else fail "invalid hex escape"
-
 decodeWith :: Parser Value -> (Value -> Result a) -> L.ByteString -> Maybe a
 decodeWith p to s =
     case L.parse p s of
@@ -372,38 +288,3 @@ jsonEOF = json <* skipSpace <* endOfInput
 -- end-of-input.  See also: 'json''.
 jsonEOF' :: Parser Value
 jsonEOF' = json' <* skipSpace <* endOfInput
-
-word8 :: Word8 -> Ptr Word8 -> Z.ZeptoT IO (Ptr Word8)
-word8 w ptr = do
-  liftIO $ poke ptr w
-  return $! ptr `plusPtr` 1
-
-copy :: ByteString -> Ptr Word8 -> Z.ZeptoT IO (Ptr Word8)
-copy (PS fp off len) ptr =
-  liftIO . withForeignPtr fp $ \src -> do
-    copyBytes ptr (src `plusPtr` off) len
-    return $! ptr `plusPtr` len
-
-charUtf8 :: Char -> Ptr Word8 -> Z.ZeptoT IO (Ptr Word8)
-charUtf8 ch ptr
-  | ch < '\x80'   = liftIO $ do
-                       poke ptr (fromIntegral (ord ch))
-                       return $! ptr `plusPtr` 1
-  | ch < '\x800'  = liftIO $ do
-                       let (a,b) = ord2 ch
-                       poke ptr a
-                       poke (ptr `plusPtr` 1) b
-                       return $! ptr `plusPtr` 2
-  | ch < '\xffff' = liftIO $ do
-                       let (a,b,c) = ord3 ch
-                       poke ptr a
-                       poke (ptr `plusPtr` 1) b
-                       poke (ptr `plusPtr` 2) c
-                       return $! ptr `plusPtr` 3
-  | otherwise     = liftIO $ do
-                       let (a,b,c,d) = ord4 ch
-                       poke ptr a
-                       poke (ptr `plusPtr` 1) b
-                       poke (ptr `plusPtr` 2) c
-                       poke (ptr `plusPtr` 3) d
-                       return $! ptr `plusPtr` 4
diff --git a/Data/Aeson/Parser/Unescape.hs b/Data/Aeson/Parser/Unescape.hs
new file mode 100644
index 000000000..d3682fd16
--- /dev/null
+++ b/Data/Aeson/Parser/Unescape.hs
@@ -0,0 +1,51 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE MagicHash                #-}
+{-# LANGUAGE UnliftedFFITypes         #-}
+
+module Data.Aeson.Parser.Unescape (
+  unescapeText
+) where
+
+import           Control.Exception          (evaluate, throw, try)
+import           Control.Monad.ST.Unsafe    (unsafeIOToST, unsafeSTToIO)
+import           Data.ByteString            as B
+import           Data.ByteString.Internal   as B hiding (c2w)
+import qualified Data.Text.Array            as A
+import           Data.Text.Encoding.Error   (UnicodeException (..))
+import           Data.Text.Internal         (Text (..))
+import           Data.Text.Internal.Private (runText)
+import           Data.Text.Unsafe           (unsafeDupablePerformIO)
+import           Data.Word                  (Word8)
+import           Foreign.C.Types            (CInt (..), CSize (..))
+import           Foreign.ForeignPtr         (withForeignPtr)
+import           Foreign.Marshal.Utils      (with)
+import           Foreign.Ptr                (Ptr, plusPtr)
+import           Foreign.Storable           (peek)
+import           GHC.Base                   (MutableByteArray#)
+
+foreign import ccall unsafe "_js_decode_string" c_js_decode
+    :: MutableByteArray# s -> Ptr CSize
+    -> Ptr Word8 -> Ptr Word8 -> IO CInt
+
+unescapeText' :: ByteString -> Text
+unescapeText' (PS fp off len) = runText $ \done -> do
+  let go dest = withForeignPtr fp $ \ptr ->
+        with (0::CSize) $ \destOffPtr -> do
+          let end = ptr `plusPtr` (off + len)
+              loop curPtr = do
+                res <- c_js_decode (A.maBA dest) destOffPtr curPtr end
+                case res of
+                  0 -> do
+                    n <- peek destOffPtr
+                    unsafeSTToIO (done dest (fromIntegral n))
+                  _ ->
+                    throw (DecodeError desc Nothing)
+          loop (ptr `plusPtr` off)
+  (unsafeIOToST . go) =<< A.new len
+ where
+  desc = "Data.Text.Internal.Encoding.decodeUtf8: Invalid UTF-8 stream"
+{-# INLINE unescapeText' #-}
+
+unescapeText :: ByteString -> Either UnicodeException Text
+unescapeText = unsafeDupablePerformIO . try . evaluate . unescapeText'
+{-# INLINE unescapeText #-}
diff --git a/aeson.cabal b/aeson.cabal
index 273660c74..05bded1c1 100644
--- a/aeson.cabal
+++ b/aeson.cabal
@@ -52,6 +52,7 @@ extra-source-files:
     examples/*.hs
     examples/Twitter/*.hs
     include/*.h
+    cbits/*.c
 
 flag developer
   description: operate in developer mode
@@ -86,6 +87,7 @@ library
     Data.Aeson.Encoding.Builder
     Data.Aeson.Internal.Functions
     Data.Aeson.Parser.Internal
+    Data.Aeson.Parser.Unescape
     Data.Aeson.Parser.Time
     Data.Aeson.Types.FromJSON
     Data.Aeson.Types.Generic
@@ -136,6 +138,7 @@ library
   ghc-options: -O2 -Wall
 
   include-dirs: include
+  c-sources:  cbits/unescape_string.c
 
 test-suite tests
   default-language: Haskell2010
diff --git a/benchmarks/aeson-benchmarks.cabal b/benchmarks/aeson-benchmarks.cabal
index b6cee5b97..6a2ce9ea1 100644
--- a/benchmarks/aeson-benchmarks.cabal
+++ b/benchmarks/aeson-benchmarks.cabal
@@ -11,7 +11,7 @@ flag bytestring-builder
 
 library
   hs-source-dirs: .. .
-
+  c-sources:  ../cbits/unescape_string.c
   exposed-modules:
     Data.Aeson
     Data.Aeson.Encoding
diff --git a/cbits/unescape_string.c b/cbits/unescape_string.c
new file mode 100644
index 000000000..2d3120946
--- /dev/null
+++ b/cbits/unescape_string.c
@@ -0,0 +1,150 @@
+// Copyright (c) 2008-2009 Bjoern Hoehrmann
+// Copyright (c) 2015, Ondrej Palkovsky
+// Copyright (c) 2016, Winterland
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 12
+
+static const uint8_t utf8d[] = {
+  // The first part of the table maps bytes to character classes that
+  // to reduce the size of the transition table and create bitmasks.
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+  // The second part is a transition table that maps a combination
+  // of a state of the automaton and a character class to a state.
+   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+  12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
+  uint32_t type = utf8d[byte];
+
+  *codep = (*state != UTF8_ACCEPT) ?
+    (byte & 0x3fu) | (*codep << 6) :
+    (0xff >> type) & (byte);
+
+  *state = utf8d[256 + *state + type];
+  return *state;
+}
+
+static inline uint16_t decode_hex(uint32_t c)
+{
+  if (c >= '0' && c <= '9')      return c - '0';
+  else if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+  else if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+  return 0xFFFF; // Should not happen
+}
+
+// Decode, return non-zero value on error
+int _js_decode_string(uint16_t *const dest, size_t *destoff,
+                  const uint8_t *s, const uint8_t *const srcend)
+{
+  uint16_t *d = dest + *destoff;
+  uint32_t state = 0;
+  uint32_t codepoint;
+
+  uint8_t surrogate = 0;
+  uint16_t temp_hex = 0;
+  uint16_t unidata;
+
+  // Optimized version of dispatch when just an ASCII char is expected
+  #define DISPATCH_ASCII(label) {\
+    if (s >= srcend) {\
+      return -1;\
+    }\
+    codepoint = *s++;\
+    goto label;\
+  }
+
+  standard:
+    // Test end of stream
+    while (s < srcend) {
+        if (decode(&state, &codepoint, *s++) != UTF8_ACCEPT) {
+          if (state == UTF8_REJECT) { return -1; }
+          continue;
+        }
+
+        if (codepoint == '\\')
+          DISPATCH_ASCII(backslash)
+        else if (codepoint <= 0xffff)
+          *d++ = (uint16_t) codepoint;
+        else {
+          *d++ = (uint16_t) (0xD7C0 + (codepoint >> 10));
+          *d++ = (uint16_t) (0xDC00 + (codepoint & 0x3FF));
+        }
+    }
+    *destoff = d - dest;
+    // Exit point
+    return (state != UTF8_ACCEPT);
+  backslash:
+    switch (codepoint) {
+      case '"':
+      case '\\':
+      case '/':
+        *d++ = (uint16_t) codepoint;
+        goto standard;
+        break;
+      case 'b': *d++ = '\b';goto standard;
+      case 'f': *d++ = '\f';goto standard;
+      case 'n': *d++ = '\n';goto standard;
+      case 'r': *d++ = '\r';goto standard;
+      case 't': *d++ = '\t';goto standard;
+      case 'u': DISPATCH_ASCII(unicode1);;break;
+      default:
+        return -1;
+    }
+  unicode1:
+    temp_hex = decode_hex(codepoint);
+    if (temp_hex == 0xFFFF) { return -1; }
+    else unidata = temp_hex << 12;
+    DISPATCH_ASCII(unicode2);
+  unicode2:
+    temp_hex = decode_hex(codepoint);
+    if (temp_hex == 0xFFFF) { return -1; }
+    else unidata |= temp_hex << 8;
+    DISPATCH_ASCII(unicode3);
+  unicode3:
+    temp_hex = decode_hex(codepoint);
+    if (temp_hex == 0xFFFF) { return -1; }
+    else unidata |= temp_hex << 4;
+    DISPATCH_ASCII(unicode4);
+  unicode4:
+    temp_hex = decode_hex(codepoint);
+    if (temp_hex == 0xFFFF) { return -1; }
+    else unidata |= temp_hex;
+    *d++ = (uint16_t) unidata;
+
+    if (surrogate) {
+      if (unidata <= 0xDC00 || unidata >= 0xDFFF) // is not low surrogate
+        return -1;
+      surrogate = 0;
+    } else if (unidata >= 0xD800 && unidata <= 0xDBFF ) { // is high surrogate
+        surrogate = 1;
+        DISPATCH_ASCII(surrogate1);
+    } else if (unidata >= 0xDC00 && unidata <= 0xDFFF) { // is low surrogate
+        return -1;
+    }
+    goto standard;
+  surrogate1:
+    if (codepoint != '\\') { return -1; }
+    DISPATCH_ASCII(surrogate2)
+  surrogate2:
+    if (codepoint != 'u') { return -1; }
+    DISPATCH_ASCII(unicode1)
+}
+