Skip to content

Commit d72dca8

Browse files
tlonnytlonny
and
tlonny
authored
Allow dominate to work in async contexts (#187)
* Allow dominate to work in async contexts Using ContextVars to allow dominate to work within async contexts. Added unit tests to ensure code works as expected. * Small Fixes - Added .venv and .envrc to .gitignore (I use direnv and venv to keep my python environments isolated - I hope this is okay!) - Removed print statements I left in dom_tag during debugging - Replaced global incrementing int with UUID for contextvar ID generation - this zeroes the risk of race-hazards/collisions - _get_thread_context now returns a tuple vs. a hash of a tuple. Functionally not much changes - the underlying dictionary will still use the same hashing function but the only difference is that _if_ there is a collision, the dictionary will still be able to return the correct element --------- Co-authored-by: tlonny <tlonny@lonnycore.lonny.zone>
1 parent 4a66756 commit d72dca8

File tree

3 files changed

+109
-4
lines changed

3 files changed

+109
-4
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,5 @@ nosetests.xml
3939

4040
.idea
4141
.idea/
42+
.venv/
43+
.envrc

dominate/dom_tag.py

+32-4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
from collections import defaultdict, namedtuple
2424
from functools import wraps
2525
import threading
26+
from asyncio import get_event_loop
27+
from uuid import uuid4
28+
from contextvars import ContextVar
2629

2730
try:
2831
# Python 3
@@ -37,19 +40,44 @@
3740
basestring = str
3841
unicode = str
3942

40-
4143
try:
4244
import greenlet
4345
except ImportError:
4446
greenlet = None
4547

48+
# We want dominate to work in async contexts - however, the problem is
49+
# when we bind a tag using "with", we set what is essentially a global variable.
50+
# If we are processing multiple documents at the same time, one context
51+
# can "overwrite" the "bound tag" of another - this can cause documents to
52+
# sort of bleed into one another...
53+
54+
# The solution is to use a ContextVar - which provides async context local storage.
55+
# We use this to store a unique ID for each async context. We then use thie ID to
56+
# form the key (in _get_thread_context) that is used to index the _with_context defaultdict.
57+
# The presense of this key ensures that each async context has its own stack and doesn't conflict.
58+
async_context_id = ContextVar('async_context_id', default = None)
59+
60+
def _get_async_context_id():
61+
if async_context_id.get() is None:
62+
async_context_id.set(uuid4().hex)
63+
return async_context_id.get()
4664

4765
def _get_thread_context():
4866
context = [threading.current_thread()]
67+
# Tag extra content information with a name to make sure
68+
# a greenlet.getcurrent() == 1 doesn't get confused with a
69+
# a _get_thread_context() == 1.
4970
if greenlet:
50-
context.append(greenlet.getcurrent())
51-
return hash(tuple(context))
52-
71+
context.append(("greenlet", greenlet.getcurrent()))
72+
73+
try:
74+
if get_event_loop().is_running():
75+
# Only add this extra information if we are actually in a running event loop
76+
context.append(("async", _get_async_context_id()))
77+
# A runtime error is raised if there is no async loop...
78+
except RuntimeError:
79+
pass
80+
return tuple(context)
5381

5482
class dom_tag(object):
5583
is_single = False # Tag does not require matching end tag (ex. <hr/>)

tests/test_dom_tag_async.py

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from asyncio import gather, run, Semaphore
2+
from dominate.dom_tag import async_context_id
3+
from textwrap import dedent
4+
5+
from dominate import tags
6+
7+
# To simulate sleep without making the tests take a hella long time to complete
8+
# lets use a pair of semaphores to explicitly control when our coroutines run.
9+
# The order of execution will be marked as comments below:
10+
def test_async_bleed():
11+
async def tag_routine_1(sem_1, sem_2):
12+
root = tags.div(id = 1) # [1]
13+
with root: # [2]
14+
sem_2.release() # [3]
15+
await sem_1.acquire() # [4]
16+
tags.div(id = 2) # [11]
17+
return str(root) # [12]
18+
19+
async def tag_routine_2(sem_1, sem_2):
20+
await sem_2.acquire() # [5]
21+
root = tags.div(id = 3) # [6]
22+
with root: # [7]
23+
tags.div(id = 4) # [8]
24+
sem_1.release() # [9]
25+
return str(root) # [10]
26+
27+
async def merge():
28+
sem_1 = Semaphore(0)
29+
sem_2 = Semaphore(0)
30+
return await gather(
31+
tag_routine_1(sem_1, sem_2),
32+
tag_routine_2(sem_1, sem_2)
33+
)
34+
35+
# Set this test up for failure - pre-set the context to a non-None value.
36+
# As it is already set, _get_async_context_id will not set it to a new, unique value
37+
# and thus we won't be able to differentiate between the two contexts. This essentially simulates
38+
# the behavior before our async fix was implemented (the bleed):
39+
async_context_id.set(0)
40+
tag_1, tag_2 = run(merge())
41+
42+
# This looks wrong - but its what we would expect if we don't
43+
# properly handle async...
44+
assert tag_1 == dedent("""\
45+
<div id="1">
46+
<div id="3">
47+
<div id="4"></div>
48+
</div>
49+
<div id="2"></div>
50+
</div>
51+
""").strip()
52+
53+
assert tag_2 == dedent("""\
54+
<div id="3">
55+
<div id="4"></div>
56+
</div>
57+
""").strip()
58+
59+
# Okay, now lets do it right - lets clear the context. Now when each async function
60+
# calls _get_async_context_id, it will get a unique value and we can differentiate.
61+
async_context_id.set(None)
62+
tag_1, tag_2 = run(merge())
63+
64+
# Ah, much better...
65+
assert tag_1 == dedent("""\
66+
<div id="1">
67+
<div id="2"></div>
68+
</div>
69+
""").strip()
70+
71+
assert tag_2 == dedent("""\
72+
<div id="3">
73+
<div id="4"></div>
74+
</div>
75+
""").strip()

0 commit comments

Comments
 (0)