From 124c2c30684fbdb6c9139c36e3ff409e0af9de77 Mon Sep 17 00:00:00 2001
From: Jim Crist-Harif <jcristharif@gmail.com>
Date: Fri, 10 Sep 2021 13:31:25 -0500
Subject: [PATCH] Don't drop the GIL during `uv_run`

Dropping the GIL while performing non-blocking IO can be detrimental for
some server workloads. Consider the case where a single Python process
is doing a mix of asyncio and compute bound tasks (running in background
threads). If the compute bound tasks don't drop the GIL (or only drop it
for a fraction of the time), then the background threads may acquire and
hold the GIL during every call to `uv_run` (which should return quickly,
since all socket operations are non-blocking). This can tank IO
performance (for more information, see
https://bugs.python.org/issue7946).

For example, running the enclosed echoserver benchmark as follows:

```
$ python echoserver.py --uvloop --buffered --proto --gil
```

On master:

```
$ python echoclient.py --num 1000
will connect to: ('127.0.0.1', 25000)
Sending 1000 messages
Sending 1000 messages
Sending 1000 messages
3000 in 19.91612672805786
150.63169867128815 requests/sec
```

This PR:

```
$ python echoclient.py
will connect to: ('127.0.0.1', 25000)
Sending 200000 messages
Sending 200000 messages
Sending 200000 messages
600000 in 9.143043279647827
65623.66398675856 requests/sec
```

This change may not be beneficial for all workloads. This will
prioritize waiting IO over waiting background compute threads. I suspect
for a server context this is usually what you'd want (and is what we
want for dask), but other users may differ. If needed, we could make
this a configurable setting on the loop without a ton of effort.
---
 examples/bench/echoserver.py | 13 +++++++++++++
 uvloop/loop.pyx              |  3 +--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/examples/bench/echoserver.py b/examples/bench/echoserver.py
index 906ca85a..5890a5d3 100644
--- a/examples/bench/echoserver.py
+++ b/examples/bench/echoserver.py
@@ -5,6 +5,7 @@
 import pathlib
 import socket
 import ssl
+import threading
 
 
 PRINT = 0
@@ -107,6 +108,7 @@ async def print_debug(loop):
     parser.add_argument('--print', default=False, action='store_true')
     parser.add_argument('--ssl', default=False, action='store_true')
     parser.add_argument('--buffered', default=False, action='store_true')
+    parser.add_argument('--gil', default=False, action='store_true')
     args = parser.parse_args()
 
     if args.uvloop:
@@ -156,6 +158,17 @@ async def print_debug(loop):
             server_context.check_hostname = False
         server_context.verify_mode = ssl.CERT_NONE
 
+    if args.gil:
+        print("adding GIL contention")
+
+        def spin():
+            while True:
+                pass
+
+        t = threading.Thread(target=spin)
+        t.daemon = True
+        t.start()
+
     if args.streams:
         if args.proto:
             print('cannot use --stream and --proto simultaneously')
diff --git a/uvloop/loop.pyx b/uvloop/loop.pyx
index d9b5aaac..77b843c0 100644
--- a/uvloop/loop.pyx
+++ b/uvloop/loop.pyx
@@ -494,8 +494,7 @@ cdef class Loop:
         # never deallocate during the run -- so we do some
         # manual refs management.
         Py_INCREF(self)
-        with nogil:
-            err = uv.uv_run(self.uvloop, mode)
+        err = uv.uv_run(self.uvloop, mode)
         Py_DECREF(self)
 
         if err < 0: