From 8eb48fd2d0305eaf7ff6ccd87057c8b60ce21b91 Mon Sep 17 00:00:00 2001 From: epiphyte Date: Wed, 16 Oct 2019 22:20:52 +0000 Subject: [PATCH 1/2] Allow unmatch surrogates through when encoding structs. The real world is a messy place, don't die becuase of it. --- elasticsearch_async/transport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elasticsearch_async/transport.py b/elasticsearch_async/transport.py index f108a3e..12dcdf4 100644 --- a/elasticsearch_async/transport.py +++ b/elasticsearch_async/transport.py @@ -198,7 +198,7 @@ def perform_request(self, method, url, headers=None, params=None, body=None): if body is not None: try: - body = body.encode('utf-8') + body = body.encode('utf-8', 'surrogatepass') except (UnicodeDecodeError, AttributeError): # bytes/str - no need to re-encode pass From 22994bf33fcb0cd7cdc44cfc544fb3c3850de2b4 Mon Sep 17 00:00:00 2001 From: epiphyte Date: Fri, 18 Oct 2019 14:39:45 +0000 Subject: [PATCH 2/2] Also fix utf8 surrogatepass errors when reading data --- elasticsearch_async/connection.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/elasticsearch_async/connection.py b/elasticsearch_async/connection.py index 6d3c912..86fac61 100644 --- a/elasticsearch_async/connection.py +++ b/elasticsearch_async/connection.py @@ -96,7 +96,8 @@ def perform_request(self, method, url, params=None, body=None, timeout=None, ign try: with async_timeout.timeout(timeout or self.timeout, loop=self.loop): response = yield from self.session.request(method, url, data=body, headers=headers) - raw_data = yield from response.text() + raw_data = yield from response.content.read() + raw_data = raw_data.decode('utf-8', errors='surrogatepass') duration = self.loop.time() - start except asyncio.CancelledError: