Skip to content

Commit

Permalink
drm/i915: Coordinate i915_active with its own mutex
Browse files Browse the repository at this point in the history
Forgo the struct_mutex serialisation for i915_active, and interpose its
own mutex handling for active/retire.

This is a multi-layered sleight-of-hand. First, we had to ensure that no
active/retire callbacks accidentally inverted the mutex ordering rules,
nor assumed that they were themselves serialised by struct_mutex. More
challenging though, is the rule over updating elements of the active
rbtree. Instead of the whole i915_active now being serialised by
struct_mutex, allocations/rotations of the tree are serialised by the
i915_active.mutex and individual nodes are serialised by the caller
using the i915_timeline.mutex (we need to use nested spinlocks to
interact with the dma_fence callback lists).

The pain point here is that instead of a single mutex around execbuf, we
now have to take a mutex for active tracker (one for each vma, context,
etc) and a couple of spinlocks for each fence update. The improvement in
fine grained locking allowing for multiple concurrent clients
(eventually!) should be worth it in typical loads.

v2: Add some comments that barely elucidate anything :(

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-6-chris@chris-wilson.co.uk
  • Loading branch information
ickle committed Oct 4, 2019
1 parent 274cbf2 commit b1e3177
Show file tree
Hide file tree
Showing 24 changed files with 298 additions and 572 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/display/intel_frontbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
front->obj = obj;
kref_init(&front->ref);
atomic_set(&front->bits, 0);
i915_active_init(i915, &front->write,
i915_active_init(&front->write,
frontbuffer_active,
i915_active_may_sleep(frontbuffer_retire));

Expand Down
3 changes: 1 addition & 2 deletions drivers/gpu/drm/i915/display/intel_overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -1360,8 +1360,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
overlay->contrast = 75;
overlay->saturation = 146;

i915_active_init(dev_priv,
&overlay->last_flip,
i915_active_init(&overlay->last_flip,
NULL, intel_overlay_last_flip_retire);

ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
Expand Down
4 changes: 1 addition & 3 deletions drivers/gpu/drm/i915/gem/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -868,20 +868,18 @@ static int context_barrier_task(struct i915_gem_context *ctx,
void (*task)(void *data),
void *data)
{
struct drm_i915_private *i915 = ctx->i915;
struct context_barrier_task *cb;
struct i915_gem_engines_iter it;
struct intel_context *ce;
int err = 0;

lockdep_assert_held(&i915->drm.struct_mutex);
GEM_BUG_ON(!task);

cb = kmalloc(sizeof(*cb), GFP_KERNEL);
if (!cb)
return -ENOMEM;

i915_active_init(i915, &cb->base, NULL, cb_retire);
i915_active_init(&cb->base, NULL, cb_retire);
err = i915_active_acquire(&cb->base);
if (err) {
kfree(cb);
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/gem/i915_gem_object_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define __I915_GEM_OBJECT_TYPES_H__

#include <drm/drm_gem.h>
#include <uapi/drm/i915_drm.h>

#include "i915_active.h"
#include "i915_selftest.h"
Expand Down
9 changes: 3 additions & 6 deletions drivers/gpu/drm/i915/gem/i915_gem_pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,11 @@ static void call_idle_barriers(struct intel_engine_cs *engine)
struct llist_node *node, *next;

llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
struct i915_active_request *active =
struct dma_fence_cb *cb =
container_of((struct list_head *)node,
typeof(*active), link);
typeof(*cb), node);

INIT_LIST_HEAD(&active->link);
RCU_INIT_POINTER(active->request, NULL);

active->retire(active, NULL);
cb->func(NULL, cb);
}
}

Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/i915/gt/intel_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ intel_context_init(struct intel_context *ce,

mutex_init(&ce->pin_mutex);

i915_active_init(ctx->i915, &ce->active,
i915_active_init(&ce->active,
__intel_context_active, __intel_context_retire);
}

Expand Down Expand Up @@ -307,7 +307,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
return err;

/* Queue this switch after current activity by this context. */
err = i915_active_request_set(&tl->last_request, rq);
err = i915_active_fence_set(&tl->last_request, rq);
mutex_unlock(&tl->mutex);
if (err)
return err;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gt/intel_engine_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return ERR_PTR(-ENOMEM);

node->pool = pool;
i915_active_init(engine->i915, &node->active, pool_active, pool_retire);
i915_active_init(&node->active, pool_active, pool_retire);

obj = i915_gem_object_create_internal(engine->i915, sz);
if (IS_ERR(obj)) {
Expand Down
10 changes: 5 additions & 5 deletions drivers/gpu/drm/i915/gt/intel_reset.c
Original file line number Diff line number Diff line change
Expand Up @@ -844,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
*/
spin_lock_irqsave(&timelines->lock, flags);
list_for_each_entry(tl, &timelines->active_list, link) {
struct i915_request *rq;
struct dma_fence *fence;

rq = i915_active_request_get_unlocked(&tl->last_request);
if (!rq)
fence = i915_active_fence_get(&tl->last_request);
if (!fence)
continue;

spin_unlock_irqrestore(&timelines->lock, flags);
Expand All @@ -859,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
* (I915_FENCE_TIMEOUT) so this wait should not be unbounded
* in the worst case.
*/
dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
i915_request_put(rq);
dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
dma_fence_put(fence);

/* Restart iteration after droping lock */
spin_lock_irqsave(&timelines->lock, flags);
Expand Down
7 changes: 3 additions & 4 deletions drivers/gpu/drm/i915/gt/intel_timeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
cl->hwsp = hwsp;
cl->vaddr = page_pack_bits(vaddr, cacheline);

i915_active_init(hwsp->gt->i915, &cl->active,
__cacheline_active, __cacheline_retire);
i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);

return cl;
}
Expand Down Expand Up @@ -255,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,

mutex_init(&timeline->mutex);

INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
INIT_LIST_HEAD(&timeline->requests);

i915_syncmap_init(&timeline->sync);
Expand Down Expand Up @@ -443,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
* free it after the current request is retired, which ensures that
* all writes into the cacheline from previous requests are complete.
*/
err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
if (err)
goto err_cacheline;

Expand Down
9 changes: 5 additions & 4 deletions drivers/gpu/drm/i915/gt/intel_timeline_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,13 @@ struct intel_timeline {
*/
struct list_head requests;

/* Contains an RCU guarded pointer to the last request. No reference is
/*
* Contains an RCU guarded pointer to the last request. No reference is
* held to the request, users must carefully acquire a reference to
* the request using i915_active_request_get_request_rcu(), or hold the
* struct_mutex.
* the request using i915_active_fence_get(), or manage the RCU
* protection themselves (cf the i915_active_fence API).
*/
struct i915_active_request last_request;
struct i915_active_fence last_request;

/**
* We track the most recent seqno that we wait on in every context so
Expand Down
16 changes: 6 additions & 10 deletions drivers/gpu/drm/i915/gt/selftest_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,20 @@ static int context_sync(struct intel_context *ce)

mutex_lock(&tl->mutex);
do {
struct i915_request *rq;
struct dma_fence *fence;
long timeout;

rcu_read_lock();
rq = rcu_dereference(tl->last_request.request);
if (rq)
rq = i915_request_get_rcu(rq);
rcu_read_unlock();
if (!rq)
fence = i915_active_fence_get(&tl->last_request);
if (!fence)
break;

timeout = i915_request_wait(rq, 0, HZ / 10);
timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
if (timeout < 0)
err = timeout;
else
i915_request_retire_upto(rq);
i915_request_retire_upto(to_request(fence));

i915_request_put(rq);
dma_fence_put(fence);
} while (!err);
mutex_unlock(&tl->mutex);

Expand Down
10 changes: 7 additions & 3 deletions drivers/gpu/drm/i915/gt/selftest_lrc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1172,9 +1172,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
if (!rq)
return NULL;

INIT_LIST_HEAD(&rq->active_list);
rq->engine = engine;

spin_lock_init(&rq->lock);
INIT_LIST_HEAD(&rq->fence.cb_list);
rq->fence.lock = &rq->lock;
rq->fence.ops = &i915_fence_ops;

i915_sched_node_init(&rq->sched);

/* mark this request as permanently incomplete */
Expand Down Expand Up @@ -1267,8 +1271,8 @@ static int live_suppress_wait_preempt(void *arg)
}

/* Disable NEWCLIENT promotion */
__i915_active_request_set(&i915_request_timeline(rq[i])->last_request,
dummy);
__i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
&dummy->fence);
i915_request_add(rq[i]);
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)

mutex_init(&timeline->mutex);

INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
INIT_LIST_HEAD(&timeline->requests);

i915_syncmap_init(&timeline->sync);
Expand Down
3 changes: 0 additions & 3 deletions drivers/gpu/drm/i915/gvt/scheduler.c
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,8 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_vgpu_submission *s = &vgpu->submission;
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct i915_request *rq;

lockdep_assert_held(&dev_priv->drm.struct_mutex);

if (workload->req)
return 0;

Expand Down
Loading

0 comments on commit b1e3177

Please sign in to comment.