JuliaParallel · AkhilAkkapelli · Jun 13, 2025 · Jun 18, 2025 · jpsamaroo · Jun 20, 2025
diff --git a/docs/make.jl b/docs/make.jl
@@ -20,6 +20,7 @@ makedocs(;
             "Parallel Nested Loops" => "use-cases/parallel-nested-loops.md",
         ],
         "Task Spawning" => "task-spawning.md",
+        "Task Affinity" => "task-affinity.md",
         "Data Management" => "data-management.md",
         "Distributed Arrays" => "darray.md",
         "Streaming Tasks" => "streaming.md",

diff --git a/docs/src/task-affinity.md b/docs/src/task-affinity.md
@@ -0,0 +1,127 @@
+# Task Affinity 
+
+Dagger.jl's `@spawn` macro allows precise control over task execution and result accessibility using `scope`, `compute_scope`, and `result_scope`, which specify various chunk scopes of the task.
+
+For more information on how these scopes work, see [Scopes](scopes.md#Scopes).
+
+---
+
+## Key Terms
+
+### Scope  
+`scope` defines the general set of locations where a Dagger task can execute. If `scope` is not explicitly set, the task runs within the `compute_scope`. If both `scope` and `compute_scope` both are unspecified, the task falls back to `DefaultScope()`, allowing it to run wherever execution is possible. Execution occurs on any worker within the defined scope.
+
+**Example:**
+```julia
+g = Dagger.@spawn scope=Dagger.scope(worker=3) f(x,y)
+```
+Task `g` executes only on worker 3. Its result can be accessed by any worker.
+
+---
+
+### Compute Scope  
+Like `scope`,`compute_scope` also specifies where a Dagger task can execute. The key difference is if both `compute_scope` and `scope` are provided, `compute_scope` takes precedence over `scope` for execution placement. If neither is specified, the they default to `DefaultScope()`. 
+
+**Example:**
+```julia
+g1 = Dagger.@spawn scope=Dagger.scope(worker=2,thread=3) compute_scope=Dagger.scope((worker=1, thread=2), (worker=3, thread=1))  f(x,y)
+g2 = Dagger.@spawn compute_scope=Dagger.scope((worker=1, thread=2), (worker=3, thread=1)) f(x,y)
+```
+Tasks `g1` and `g2` execute on either thread 2 of worker 1, or thread 1 of worker 3. The `scope` argument to `g1` is ignored. Their result can be accessed by any worker.
+
+---
+
+### Result Scope  
+
+The result_scope limits the workers from which a task's result can be accessed. This is crucial for managing data locality and minimizing transfers. If `result_scope` is not specified, it defaults to `AnyScope()`, meaning the result can be accessed by any worker.
+
+**Example:**
+```julia
+g = Dagger.@spawn result_scope=Dagger.scope(worker=3, threads=[1,3, 4]) f(x,y)
+```
+The result of `g` is accessible only from threads 1, 3 and 4 of worker process 3. The task's execution may happen anywhere on threads 1, 3 and 4 of worker 3.
+
+---
+
+## Interaction of `compute_scope` and `result_scope`
+
+When `scope`, `compute_scope`, and `result_scope` are all used, the scheduler executes the task on the intersection of the effective compute scope (which will be `compute_scope` if provided, otherwise `scope`) and the `result_scope`. If the intersection is empty then the scheduler throws a `Dagger.Sch.SchedulerException` error.
+
+**Example:**
+```julia
+g = Dagger.@spawn scope=Dagger.scope(worker=3,thread=2) compute_scope=Dagger.scope(worker=2) result_scope=Dagger.scope((worker=2, thread=2), (worker=4, thread=2)) f(x,y)
+```
+The task `g` computes on thread 2 of worker 2 (as it's the intersection of compute and result scopes), and its result access is also restricted to thread 2 of worker 2.
+
+---
+
+## Chunk Inputs to Tasks  
+
+This section explains how `scope`, `compute_scope`, and `result_scope` affect tasks when a `Chunk` is the primary input to `@spawn` (e.g. created via `Dagger.tochunk(...)` or by calling `fetch(task; raw=true)` on a task).
+
+Assume `g` is some function, e.g. `g(x, y) = x * 2 + y * 3`, `chunk_proc` is the chunk's processor, and `chunk_scope` is its defined accessibility.
+
+When `Dagger.tochunk(...)` is directly spawned:
+- The task executes on `chunk_proc`.
+- The result is accessible only within `chunk_scope`.
+- This behavior occurs irrespective of the `scope`, `compute_scope`, and `result_scope` values provided in the `@spawn` macro.
+- Dagger validates that there is an intersection between the effective `compute_scope` (derived from `@spawn`'s `compute_scope` or `scope`) and the `result_scope`. If no intersection exists, the scheduler throws an exception.
+
+!!! info While `chunk_proc` is currently required when constructing a chunk, it is largely unused in actual scheduling logic. It exists primarily for backward compatibility and may be deprecated in the future.
+
+**Usage:**
+```julia
+h1 = Dagger.@spawn scope=Dagger.scope(worker=3) Dagger.tochunk(g(10, 11), chunk_proc, chunk_scope)
+h2 = Dagger.@spawn compute_scope=Dagger.scope((worker=1, thread=2), (worker=3, thread=1)) Dagger.tochunk(g(20, 21), chunk_proc, chunk_scope)
+h3 = Dagger.@spawn scope=Dagger.scope(worker=2,thread=3) compute_scope=Dagger.scope((worker=1, thread=2), (worker=3, thread=1)) Dagger.tochunk(g(30, 31), chunk_proc, chunk_scope)
+h4 = Dagger.@spawn result_scope=Dagger.scope(worker=3) Dagger.tochunk(g(40, 41), chunk_proc, chunk_scope)
+h5 = Dagger.@spawn scope=Dagger.scope(worker=3,thread=2) compute_scope=Dagger.ProcessScope(2) result_scope=Dagger.scope(worker=2,threads=[2,3]) Dagger.tochunk(g(50, 51), chunk_proc, chunk_scope)
+```
+In all these cases (`h1` through `h5`), the tasks get executed on processor `chunk_proc` of chunk, and its result is accessible only within `chunk_scope`.
+
+---
+
+## Function with Chunk Arguments as Tasks  
+
+This section details behavior when `scope`, `compute_scope`, and `result_scope` are used with tasks where a function is the input, and its arguments include `Chunk`s.
+
+Assume `g(x, y) = x * 2 + y * 3` is a function, and `arg = Dagger.tochunk(g(1, 2), arg_proc, arg_scope)` is a chunk argument, where `arg_proc` is the chunk's processor and `arg_scope` is its defined scope.
+
+### Scope  
+If `arg_scope` and `scope` do not intersect, the scheduler throws an exception. Execution occurs on the intersection of `scope` and `arg_scope`.
+
+```julia
+h = Dagger.@spawn scope=Dagger.scope(worker=3) g(arg, 11)
+```
+Task `h` executes on any worker within the intersection of `scope` and `arg_scope`. The result is accessible from any worker.
+
+---
+
+### Compute scope and Chunk argument scopes interaction 
+If `arg_scope` and `compute_scope` do not intersect, the scheduler throws an exception. Otherwise, execution happens on the intersection of the effective compute scope (which will be `compute_scope` if provided, otherwise `scope`) and `arg_scope`. `result_scope` defaults to `AnyScope()`.
+
+```julia
+h1 = Dagger.@spawn compute_scope=Dagger.scope((worker=1, thread=2), (worker=3, thread=1)) g(arg, 11)
+h2 = Dagger.@spawn scope=Dagger.scope(worker=2,thread=3) compute_scope=Dagger.scope((worker=1, thread=2), (worker=3, thread=1)) g(arg, 21)
+```
+Tasks `h1` and `h2` execute on any worker within the intersection of the `compute_scope` and `arg_scope`. `scope` is ignored if `compute_scope` is specified. The result is stored and accessible from anywhere.
+
+---
+
+### Result scope and Chunk argument scopes interaction
+If only `result_scope` is specified, computation happens on any worker within `arg_scope`, and the result is only accessible from `result_scope`.
+
+```julia
+h = Dagger.@spawn result_scope=Dagger.scope(worker=3) g(arg, 11)
+```
+Task `h` executes on any worker within `arg_scope`. The result is accessible from `result_scope`.
+
+---
+
+### Compute, result, and chunk argument scopes interaction  
+When `scope`, `compute_scope`, and `result_scope` are all used, the scheduler executes the task on the intersection of `arg_scope`, the effective compute scope (which is `compute_scope` if provided, otherwise `scope`), and `result_scope`. If no intersection exists, the scheduler throws an exception.
+
+```julia
+h = Dagger.@spawn scope=Dagger.scope(worker=3,thread=2) compute_scope=Dagger.ProcessScope(2) result_scope=Dagger.scope((worker=2, thread=2), (worker=4, thread=2)) g(arg, 31)
+```
+Task `h` computes on thread 2 of worker 2 (as it's the intersection of `arg`, `compute`, and `result` scopes), and its result access is also restricted to thread 2 of worker 2.
diff --git a/src/sch/Sch.jl b/src/sch/Sch.jl
@@ -14,7 +14,7 @@ import Random: randperm
 import Base: @invokelatest
 
 import ..Dagger
-import ..Dagger: Context, Processor, Thunk, WeakThunk, ThunkFuture, DTaskFailedException, Chunk, WeakChunk, OSProc, AnyScope, DefaultScope, LockedObject
+import ..Dagger: Context, Processor, Thunk, WeakThunk, ThunkFuture, DTaskFailedException, Chunk, WeakChunk, OSProc, AnyScope, DefaultScope, InvalidScope, LockedObject
 import ..Dagger: order, dependents, noffspring, istask, inputs, unwrap_weak_checked, affinity, tochunk, timespan_start, timespan_finish, procs, move, chunktype, processor, get_processors, get_parent, execute!, rmprocs!, task_processor, constrain, cputhreadtime
 import ..Dagger: @dagdebug, @safe_lock_spin1
 import DataStructures: PriorityQueue, enqueue!, dequeue_pair!, peek
@@ -726,16 +726,25 @@ function schedule!(ctx, state, procs=procs_to_use(ctx))
         sig = signature(state, task)
 
         # Calculate scope
-        scope = if task.f isa Chunk
-            task.f.scope
-        else
-            if task.options.proclist !== nothing
-                # proclist overrides scope selection
-                AnyScope()
-            else
-                DefaultScope()
+        scope = constrain(task.compute_scope, task.result_scope)
+        if scope isa InvalidScope
+            ex = SchedulingException("compute_scope and result_scope are not compatible: $(scope.x), $(scope.y)")
+            state.cache[task] = ex
+            state.errored[task] = true
+            set_failed!(state, task)
+            @goto pop_task
+        end
+        if task.f isa Chunk
+            scope = constrain(scope, task.f.scope)
+            if scope isa InvalidScope
+                ex = SchedulingException("Current scope and function Chunk Scope are not compatible: $(scope.x), $(scope.y)")
+                state.cache[task] = ex
+                state.errored[task] = true
+                set_failed!(state, task)
+                @goto pop_task
             end
         end
+
         for (_,input) in task.inputs
             input = unwrap_weak_checked(input)
             chunk = if istask(input)
@@ -747,8 +756,8 @@ function schedule!(ctx, state, procs=procs_to_use(ctx))
             end
             chunk isa Chunk || continue
             scope = constrain(scope, chunk.scope)
-            if scope isa Dagger.InvalidScope
-                ex = SchedulingException("Scopes are not compatible: $(scope.x), $(scope.y)")
+            if scope isa InvalidScope
+                ex = SchedulingException("Current scope and argument Chunk scope are not compatible: $(scope.x), $(scope.y)")
                 state.cache[task] = ex
                 state.errored[task] = true
                 set_failed!(state, task)
@@ -1086,7 +1095,7 @@ function fire_tasks!(ctx, thunks::Vector{<:Tuple}, (gproc, proc), state)
                            thunk.get_result, thunk.persist, thunk.cache, thunk.meta, options,
                            propagated, ids, positions,
                            (log_sink=ctx.log_sink, profile=ctx.profile),
-                           sch_handle, state.uid])
+                           sch_handle, state.uid, thunk.result_scope])
     end
     # N.B. We don't batch these because we might get a deserialization
     # error due to something not being defined on the worker, and then we don't
@@ -1305,7 +1314,7 @@ function start_processor_runner!(istate::ProcessorInternalState, uid::UInt64, re
                         task = task_spec[]
                         scope = task[5]
                         if !isa(constrain(scope, Dagger.ExactScope(to_proc)),
-                                Dagger.InvalidScope) &&
+                                InvalidScope) &&
                            typemax(UInt32) - proc_occupancy_cached >= occupancy
                             # Compatible, steal this task
                             return dequeue_pair!(queue)
@@ -1488,7 +1497,7 @@ function do_task(to_proc, task_desc)
         scope, Tf, data,
         send_result, persist, cache, meta,
         options, propagated, ids, positions,
-        ctx_vars, sch_handle, sch_uid = task_desc
+        ctx_vars, sch_handle, sch_uid, result_scope = task_desc
     ctx = Context(Processor[]; log_sink=ctx_vars.log_sink, profile=ctx_vars.profile)
 
     from_proc = OSProc()
@@ -1696,7 +1705,7 @@ function do_task(to_proc, task_desc)
 
         # Construct result
         # TODO: We should cache this locally
-        send_result || meta ? res : tochunk(res, to_proc; device, persist, cache=persist ? true : cache,
+        send_result || meta ? res : tochunk(res, to_proc, result_scope; device, persist, cache=persist ? true : cache,
                                             tag=options.storage_root_tag,
                                             leaf_tag=something(options.storage_leaf_tag, MemPool.Tag()),
                                             retain=options.storage_retain)

diff --git a/src/thunk.jl b/src/thunk.jl
@@ -73,6 +73,8 @@ mutable struct Thunk
     eager_ref::Union{DRef,Nothing}
     options::Any # stores scheduler-specific options
     propagates::Tuple # which options we'll propagate
+    compute_scope::AbstractScope
+    result_scope::AbstractScope
     function Thunk(f, xs...;
                    syncdeps=nothing,
                    id::Int=next_id(),
@@ -84,16 +86,14 @@ mutable struct Thunk
                    affinity=nothing,
                    eager_ref=nothing,
                    processor=nothing,
-                   scope=nothing,
+                   scope=DefaultScope(),
+                   compute_scope=scope,
+                   result_scope=AnyScope(),
                    options=nothing,
                    propagates=(),
                    kwargs...
                   )
-        if !isa(f, Chunk) && (!isnothing(processor) || !isnothing(scope))
-            f = tochunk(f,
-                        something(processor, OSProc()),
-                        something(scope, DefaultScope()))
-        end
+
         xs = Base.mapany(identity, xs)
         syncdeps_set = Set{Any}(filterany(is_task_or_chunk, Base.mapany(last, xs)))
         if syncdeps !== nothing
@@ -105,11 +105,11 @@ mutable struct Thunk
         if options !== nothing
             @assert isempty(kwargs)
             new(f, xs, syncdeps_set, id, get_result, meta, persist, cache,
-                cache_ref, affinity, eager_ref, options, propagates)
+                cache_ref, affinity, eager_ref, options, propagates, compute_scope, result_scope)
         else
             new(f, xs, syncdeps_set, id, get_result, meta, persist, cache,
                 cache_ref, affinity, eager_ref, Sch.ThunkOptions(;kwargs...),
-                propagates)
+                propagates, compute_scope, result_scope)
         end
     end
 end
@@ -476,15 +476,6 @@ function spawn(f, args...; kwargs...)
         args = args[2:end]
     end
 
-    # Wrap f in a Chunk if necessary
-    processor = haskey(options, :processor) ? options.processor : nothing
-    scope = haskey(options, :scope) ? options.scope : nothing
-    if !isnothing(processor) || !isnothing(scope)
-        f = tochunk(f,
-                    something(processor, get_options(:processor, OSProc())),
-                    something(scope, get_options(:scope, DefaultScope())))
-    end
-
     # Process the args and kwargs into Pair form
     args_kwargs = args_kwargs_to_pairs(args, kwargs)
 

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -9,6 +9,7 @@ tests = [
     ("Options", "options.jl"),
     ("Mutation", "mutation.jl"),
     ("Task Queues", "task-queues.jl"),
+    ("Task Affinity", "task-affinity.jl"),
     ("Datadeps", "datadeps.jl"),
     ("Streaming", "streaming.jl"),
     ("Domain Utilities", "domain.jl"),