Skip to content

Commit d6fb953

Browse files
tylerjhardenhonzakral
authored andcommitted
Fixes non UTF-8 surrogateescapes (#612)
Fixes non UTF-8 surrogateescapes Surrogate escapes in Unicode (non UTF-8 encoding) will be properly escaped with backslashes when encountered, versus breaking the transport layer. Fixes #611
1 parent 9320ee7 commit d6fb953

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

elasticsearch/transport.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def perform_request(self, method, url, params=None, body=None):
292292

293293
if body is not None:
294294
try:
295-
body = body.encode('utf-8')
295+
body = body.encode('utf-8', 'surrogatepass')
296296
except (UnicodeDecodeError, AttributeError):
297297
# bytes/str - no need to re-encode
298298
pass

test_elasticsearch/test_transport.py

+7
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,13 @@ def test_body_bytes_get_passed_untouched(self):
105105
self.assertEquals(1, len(t.get_connection().calls))
106106
self.assertEquals(('GET', '/', None, body), t.get_connection().calls[0][0])
107107

108+
def test_body_surrogates_replaced_encoded_into_bytes(self):
109+
t = Transport([{}], connection_class=DummyConnection)
110+
111+
t.perform_request('GET', '/', body='你好\uda6a')
112+
self.assertEquals(1, len(t.get_connection().calls))
113+
self.assertEquals(('GET', '/', None, b'\xe4\xbd\xa0\xe5\xa5\xbd\xed\xa9\xaa'), t.get_connection().calls[0][0])
114+
108115
def test_kwargs_passed_on_to_connections(self):
109116
t = Transport([{'host': 'google.com'}], port=123)
110117
self.assertEquals(1, len(t.connection_pool.connections))

0 commit comments

Comments
 (0)