Skip to content

Commit 931eee8

Browse files
authored
refactor(turbo-tasks): Use an execution id instead of the parent task id to prevent local Vc escapes (#78487)
Re-using the task id just for the `Vc` scope escape check isn't ideal: - A task can execute multiple times. Just using task ids doesn't guarantee that the `Vc` hasn't leaked across multiple executions of the same task. - This is just a fallback check to help with debugging, so allocating 32 bits to this id seems wasteful. We can do a good job with 16 bits. Nothing else really needs the task id, so we can get rid of it later in this PR stack. A version of this idea was part of the original local tasks idea: https://www.notion.so/vercel/Resolved-Vcs-Vc-Lifetimes-Local-Vcs-and-Vc-Refcounts-49d666d3f9594017b5b312b87ddc5bff?pvs=4#da95dbdc390144a9bb1b3d2fb73867b7 Even with this extra 4 bits, the overall enum size is still 16 bits due to alignment and the size of the other enum variants, so this shouldn't regress memory consumption. The next PR (#78561) removes `TaskId` entirely...
1 parent 0eb78fd commit 931eee8

File tree

7 files changed

+111
-49
lines changed

7 files changed

+111
-49
lines changed

turbopack/crates/turbo-tasks-testing/src/lib.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ use turbo_tasks::{
2020
registry,
2121
test_helpers::with_turbo_tasks_for_testing,
2222
util::{SharedError, StaticOrArc},
23-
CellId, InvalidationReason, LocalTaskId, MagicAny, RawVc, ReadCellOptions, ReadConsistency,
24-
TaskId, TaskPersistence, TraitTypeId, TurboTasksApi, TurboTasksCallApi,
23+
CellId, ExecutionId, InvalidationReason, LocalTaskId, MagicAny, RawVc, ReadCellOptions,
24+
ReadConsistency, TaskId, TaskPersistence, TraitTypeId, TurboTasksApi, TurboTasksCallApi,
2525
};
2626

2727
pub use crate::run::{run, run_with_tt, run_without_cache_check, Registration};
@@ -57,9 +57,11 @@ impl VcStorage {
5757
i
5858
};
5959
let task_id = TaskId::try_from(u32::try_from(i + 1).unwrap()).unwrap();
60+
let execution_id = ExecutionId::try_from(u16::try_from(i + 1).unwrap()).unwrap();
6061
handle.spawn(with_turbo_tasks_for_testing(
6162
this.clone(),
6263
task_id,
64+
execution_id,
6365
async move {
6466
let result = AssertUnwindSafe(future).catch_unwind().await;
6567

@@ -230,7 +232,7 @@ impl TurboTasksApi for VcStorage {
230232

231233
fn try_read_local_output(
232234
&self,
233-
_parent_task_id: TaskId,
235+
_execution_id: ExecutionId,
234236
_local_task_id: LocalTaskId,
235237
) -> Result<Result<RawVc, EventListener>> {
236238
unimplemented!()
@@ -322,6 +324,7 @@ impl VcStorage {
322324
..Default::default()
323325
}),
324326
TaskId::MAX,
327+
ExecutionId::MIN,
325328
f,
326329
)
327330
}

turbopack/crates/turbo-tasks/src/id.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,13 @@ define_id!(
125125
serde(transparent),
126126
doc = "Represents the nth `local` function call inside a task.",
127127
);
128+
define_id!(
129+
ExecutionId: u16,
130+
derive(Debug, Serialize, Deserialize),
131+
serde(transparent),
132+
doc = "An identifier for a specific task execution. Used to assert that local `Vc`s don't \
133+
leak. This value may overflow and re-use old values.",
134+
);
128135

129136
impl Debug for TaskId {
130137
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {

turbopack/crates/turbo-tasks/src/id_factory.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,36 @@ where
9696
),
9797
}
9898
}
99+
100+
/// Returns an id, potentially allowing an overflow. This may cause ids to be silently re-used.
101+
/// Used for [`crate::id::ExecutionId`].
102+
///
103+
/// If id re-use is desired only for "freed" ids, use [`IdFactoryWithReuse`] instead.
104+
pub fn wrapping_get(&self) -> T {
105+
let count = self.counter.fetch_add(1, Ordering::Relaxed);
106+
107+
let new_id_u64 = (count % self.max_count) + self.id_offset;
108+
// Safety:
109+
// - `id_offset` is a non-zero value.
110+
// - `id_offset + max_count < u64::MAX`.
111+
let new_id = unsafe { NonZeroU64::new_unchecked(new_id_u64) };
112+
113+
match new_id.try_into() {
114+
Ok(id) => id,
115+
Err(_) => panic!(
116+
"Failed to convert NonZeroU64 value of {} into {}",
117+
new_id,
118+
type_name::<T>()
119+
),
120+
}
121+
}
99122
}
100123

101124
/// An [`IdFactory`], but extended with a free list to allow for id reuse for ids such as
102125
/// [`BackendJobId`][crate::backend::BackendJobId].
126+
///
127+
/// If silent untracked re-use of ids is okay, consider using the cheaper
128+
/// [`IdFactory::wrapping_get`] method.
103129
pub struct IdFactoryWithReuse<T> {
104130
factory: IdFactory<T>,
105131
free_ids: ConcurrentQueue<T>,

turbopack/crates/turbo-tasks/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ pub use completion::{Completion, Completions};
9191
pub use display::ValueToString;
9292
pub use effect::{apply_effects, effect, get_effects, Effects};
9393
pub use id::{
94-
FunctionId, LocalTaskId, SessionId, TaskId, TraitTypeId, ValueTypeId, TRANSIENT_TASK_BIT,
94+
ExecutionId, FunctionId, LocalTaskId, SessionId, TaskId, TraitTypeId, ValueTypeId,
95+
TRANSIENT_TASK_BIT,
9596
};
9697
pub use invalidation::{
9798
get_invalidator, DynamicEqHash, InvalidationReason, InvalidationReasonKind,

turbopack/crates/turbo-tasks/src/manager.rs

Lines changed: 56 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use crate::{
3131
},
3232
capture_future::{self, CaptureFuture},
3333
event::{Event, EventListener},
34-
id::{BackendJobId, FunctionId, LocalTaskId, TraitTypeId, TRANSIENT_TASK_BIT},
34+
id::{BackendJobId, ExecutionId, FunctionId, LocalTaskId, TraitTypeId, TRANSIENT_TASK_BIT},
3535
id_factory::IdFactoryWithReuse,
3636
magic_any::MagicAny,
3737
raw_vc::{CellId, RawVc},
@@ -41,10 +41,11 @@ use crate::{
4141
task_statistics::TaskStatisticsApi,
4242
trace::TraceRawVcs,
4343
trait_helpers::get_trait_method,
44-
util::StaticOrArc,
44+
util::{IdFactory, StaticOrArc},
4545
vc::ReadVcFuture,
46-
Completion, InvalidationReason, InvalidationReasonSet, ReadCellOptions, ResolvedVc,
47-
SharedReference, TaskId, TaskIdSet, ValueTypeId, Vc, VcRead, VcValueTrait, VcValueType,
46+
Completion, InvalidationReason, InvalidationReasonSet, OutputContent, ReadCellOptions,
47+
ResolvedVc, SharedReference, TaskId, TaskIdSet, ValueTypeId, Vc, VcRead, VcValueTrait,
48+
VcValueType,
4849
};
4950

5051
pub trait TurboTasksCallApi: Sync + Send {
@@ -139,7 +140,7 @@ pub trait TurboTasksApi: TurboTasksCallApi + Sync + Send {
139140
/// `OperationVc`s, which should never be local tasks.
140141
fn try_read_local_output(
141142
&self,
142-
parent_task_id: TaskId,
143+
execution_id: ExecutionId,
143144
local_task_id: LocalTaskId,
144145
) -> Result<Result<RawVc, EventListener>>;
145146

@@ -347,6 +348,7 @@ pub struct TurboTasks<B: Backend + 'static> {
347348
backend: B,
348349
task_id_factory: IdFactoryWithReuse<TaskId>,
349350
transient_task_id_factory: IdFactoryWithReuse<TaskId>,
351+
execution_id_factory: IdFactory<ExecutionId>,
350352
stopped: AtomicBool,
351353
currently_scheduled_tasks: AtomicUsize,
352354
currently_scheduled_foreground_jobs: AtomicUsize,
@@ -371,6 +373,7 @@ pub struct TurboTasks<B: Backend + 'static> {
371373
/// - The backend is aware of.
372374
struct CurrentTaskState {
373375
task_id: TaskId,
376+
execution_id: ExecutionId,
374377

375378
/// Affected tasks, that are tracked during task execution. These tasks will
376379
/// be invalidated when the execution finishes or before reading a cell
@@ -397,9 +400,14 @@ struct CurrentTaskState {
397400
}
398401

399402
impl CurrentTaskState {
400-
fn new(task_id: TaskId, backend_state: Box<dyn Any + Send + Sync>) -> Self {
403+
fn new(
404+
task_id: TaskId,
405+
execution_id: ExecutionId,
406+
backend_state: Box<dyn Any + Send + Sync>,
407+
) -> Self {
401408
Self {
402409
task_id,
410+
execution_id,
403411
tasks_to_notify: Vec::new(),
404412
stateful: false,
405413
cell_counters: Some(AutoMap::default()),
@@ -409,10 +417,11 @@ impl CurrentTaskState {
409417
}
410418
}
411419

412-
fn assert_task_id(&self, expected_task_id: TaskId) {
413-
if self.task_id != expected_task_id {
414-
unimplemented!(
415-
"Local tasks can currently only be scheduled/awaited within their parent task"
420+
fn assert_execution_id(&self, expected_execution_id: ExecutionId) {
421+
if self.execution_id != expected_execution_id {
422+
panic!(
423+
"Local tasks can only be scheduled/awaited within the same execution of the \
424+
parent task that created them"
416425
);
417426
}
418427
}
@@ -458,11 +467,13 @@ impl<B: Backend + 'static> TurboTasks<B> {
458467
);
459468
let transient_task_id_factory =
460469
IdFactoryWithReuse::new(TaskId::try_from(TRANSIENT_TASK_BIT).unwrap(), TaskId::MAX);
470+
let execution_id_factory = IdFactory::new(ExecutionId::MIN, ExecutionId::MAX);
461471
let this = Arc::new_cyclic(|this| Self {
462472
this: this.clone(),
463473
backend,
464474
task_id_factory,
465475
transient_task_id_factory,
476+
execution_id_factory,
466477
stopped: AtomicBool::new(false),
467478
currently_scheduled_tasks: AtomicUsize::new(0),
468479
currently_scheduled_background_jobs: AtomicUsize::new(0),
@@ -644,8 +655,11 @@ impl<B: Backend + 'static> TurboTasks<B> {
644655
let mut schedule_again = true;
645656
while schedule_again {
646657
let backend_state = this.backend.new_task_state(task_id);
658+
// it's okay for execution ids to overflow and wrap, they're just used for an assert
659+
let execution_id = this.execution_id_factory.wrapping_get();
647660
let current_task_state = Arc::new(RwLock::new(CurrentTaskState::new(
648661
task_id,
662+
execution_id,
649663
Box::new(backend_state),
650664
)));
651665
let single_execution_future = async {
@@ -722,22 +736,31 @@ impl<B: Backend + 'static> TurboTasks<B> {
722736
&self,
723737
ty: LocalTaskType,
724738
// if this is a `LocalTaskType::Resolve*`, we may spawn another task with this persistence,
725-
// if this is a `LocalTaskType::Native`, persistence is unused (there's no caching).
739+
// if this is a `LocalTaskType::Native`, persistence is unused.
740+
//
741+
// TODO: In the rare case that we're crossing a transient->persistent boundary, we should
742+
// force `LocalTaskType::Native` to be spawned as real tasks, so that any cells they create
743+
// have the correct persistence. This is not an issue for resolution stub task, as they
744+
// don't end up owning any cells.
726745
persistence: TaskPersistence,
727746
) -> RawVc {
728-
use crate::OutputContent;
729-
730747
let ty = Arc::new(ty);
731-
let (global_task_state, local_task_id, parent_task_id) = CURRENT_TASK_STATE.with(|gts| {
732-
let mut gts_write = gts.write().unwrap();
733-
let local_task_id = gts_write.create_local_task(LocalTask::Scheduled {
734-
done_event: Event::new({
735-
let ty = Arc::clone(&ty);
736-
move || format!("LocalTask({})::done_event", ty)
737-
}),
748+
let (global_task_state, parent_task_id, execution_id, local_task_id) = CURRENT_TASK_STATE
749+
.with(|gts| {
750+
let mut gts_write = gts.write().unwrap();
751+
let local_task_id = gts_write.create_local_task(LocalTask::Scheduled {
752+
done_event: Event::new({
753+
let ty = Arc::clone(&ty);
754+
move || format!("LocalTask({})::done_event", ty)
755+
}),
756+
});
757+
(
758+
Arc::clone(gts),
759+
gts_write.task_id,
760+
gts_write.execution_id,
761+
local_task_id,
762+
)
738763
});
739-
(Arc::clone(gts), local_task_id, gts_write.task_id)
740-
});
741764

742765
#[cfg(feature = "tokio_tracing")]
743766
let description = format!(
@@ -799,7 +822,7 @@ impl<B: Backend + 'static> TurboTasks<B> {
799822
#[cfg(not(feature = "tokio_tracing"))]
800823
tokio::task::spawn(future);
801824

802-
RawVc::LocalOutput(parent_task_id, persistence, local_task_id)
825+
RawVc::LocalOutput(parent_task_id, persistence, execution_id, local_task_id)
803826
}
804827

805828
fn begin_primary_job(&self) {
@@ -1277,17 +1300,17 @@ impl<B: Backend + 'static> TurboTasksApi for TurboTasks<B> {
12771300

12781301
fn try_read_local_output(
12791302
&self,
1280-
parent_task_id: TaskId,
1303+
execution_id: ExecutionId,
12811304
local_task_id: LocalTaskId,
12821305
) -> Result<Result<RawVc, EventListener>> {
12831306
CURRENT_TASK_STATE.with(|gts| {
12841307
let gts_read = gts.read().unwrap();
12851308

1286-
// Local Vcs are local to their parent task, and do not exist outside of it. This is
1287-
// weakly enforced at compile time using the `NonLocalValue` marker trait. This
1288-
// assertion exists to handle any potential escapes that the compile-time checks cannot
1289-
// capture.
1290-
gts_read.assert_task_id(parent_task_id);
1309+
// Local Vcs are local to their parent task's current execution, and do not exist
1310+
// outside of it. This is weakly enforced at compile time using the `NonLocalValue`
1311+
// marker trait. This assertion exists to handle any potential escapes that the
1312+
// compile-time checks cannot capture.
1313+
gts_read.assert_execution_id(execution_id);
12911314

12921315
match gts_read.get_local_task(local_task_id) {
12931316
LocalTask::Scheduled { done_event } => Ok(Err(done_event.listen())),
@@ -1632,13 +1655,15 @@ pub fn turbo_tasks_future_scope<T>(
16321655
pub fn with_turbo_tasks_for_testing<T>(
16331656
tt: Arc<dyn TurboTasksApi>,
16341657
current_task: TaskId,
1658+
execution_id: ExecutionId,
16351659
f: impl Future<Output = T>,
16361660
) -> impl Future<Output = T> {
16371661
TURBO_TASKS.scope(
16381662
tt,
16391663
CURRENT_TASK_STATE.scope(
16401664
Arc::new(RwLock::new(CurrentTaskState::new(
16411665
current_task,
1666+
execution_id,
16421667
Box::new(()),
16431668
))),
16441669
f,
@@ -1969,11 +1994,11 @@ pub fn find_cell_by_type(ty: ValueTypeId) -> CurrentCellRef {
19691994

19701995
pub(crate) async fn read_local_output(
19711996
this: &dyn TurboTasksApi,
1972-
parent_task_id: TaskId,
1997+
execution_id: ExecutionId,
19731998
local_task_id: LocalTaskId,
19741999
) -> Result<RawVc> {
19752000
loop {
1976-
match this.try_read_local_output(parent_task_id, local_task_id)? {
2001+
match this.try_read_local_output(execution_id, local_task_id)? {
19772002
Ok(raw_vc) => return Ok(raw_vc),
19782003
Err(event_listener) => event_listener.await,
19792004
}

turbopack/crates/turbo-tasks/src/raw_vc.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use thiserror::Error;
88
use crate::{
99
backend::{CellContent, TypedCellContent},
1010
event::EventListener,
11-
id::LocalTaskId,
11+
id::{ExecutionId, LocalTaskId},
1212
manager::{read_local_output, read_task_cell, read_task_output, with_turbo_tasks},
1313
registry::{self, get_value_type},
1414
turbo_tasks, CollectiblesSource, ReadCellOptions, ReadConsistency, ResolvedVc, TaskId,
@@ -57,7 +57,7 @@ impl Display for CellId {
5757
pub enum RawVc {
5858
TaskOutput(TaskId),
5959
TaskCell(TaskId, CellId),
60-
LocalOutput(TaskId, TaskPersistence, LocalTaskId),
60+
LocalOutput(TaskId, TaskPersistence, ExecutionId, LocalTaskId),
6161
}
6262

6363
impl RawVc {
@@ -148,8 +148,8 @@ impl RawVc {
148148
return Err(ResolveTypeError::NoContent);
149149
}
150150
}
151-
RawVc::LocalOutput(task_id, _persistence, local_task_id) => {
152-
current = read_local_output(&*tt, task_id, local_task_id)
151+
RawVc::LocalOutput(_task_id, _persistence, execution_id, local_task_id) => {
152+
current = read_local_output(&*tt, execution_id, local_task_id)
153153
.await
154154
.map_err(|source| ResolveTypeError::TaskError { source })?;
155155
}
@@ -185,9 +185,9 @@ impl RawVc {
185185
consistency = ReadConsistency::Eventual;
186186
}
187187
RawVc::TaskCell(_, _) => return Ok(current),
188-
RawVc::LocalOutput(task_id, _persistence, local_task_id) => {
188+
RawVc::LocalOutput(_task_id, _persistence, execution_id, local_task_id) => {
189189
debug_assert_eq!(consistency, ReadConsistency::Eventual);
190-
current = read_local_output(&*tt, task_id, local_task_id).await?;
190+
current = read_local_output(&*tt, execution_id, local_task_id).await?;
191191
}
192192
}
193193
}
@@ -200,8 +200,8 @@ impl RawVc {
200200
let mut current = self;
201201
loop {
202202
match current {
203-
RawVc::LocalOutput(task_id, _persistence, local_task_id) => {
204-
current = read_local_output(&*tt, task_id, local_task_id).await?;
203+
RawVc::LocalOutput(_task_id, _persistence, execution_id, local_task_id) => {
204+
current = read_local_output(&*tt, execution_id, local_task_id).await?;
205205
}
206206
non_local => return Ok(non_local),
207207
}
@@ -215,14 +215,14 @@ impl RawVc {
215215

216216
pub fn get_task_id(&self) -> TaskId {
217217
match self {
218-
RawVc::TaskOutput(t) | RawVc::TaskCell(t, _) | RawVc::LocalOutput(t, ..) => *t,
218+
RawVc::TaskOutput(t) | RawVc::TaskCell(t, ..) | RawVc::LocalOutput(t, ..) => *t,
219219
}
220220
}
221221

222222
pub fn try_get_type_id(&self) -> Option<ValueTypeId> {
223223
match self {
224224
RawVc::TaskCell(_, CellId { type_id, .. }) => Some(*type_id),
225-
RawVc::TaskOutput(_) | RawVc::LocalOutput(..) => None,
225+
RawVc::TaskOutput(..) | RawVc::LocalOutput(..) => None,
226226
}
227227
}
228228

@@ -360,9 +360,9 @@ impl Future for ReadRawVcFuture {
360360
Err(err) => return Poll::Ready(Err(err)),
361361
}
362362
}
363-
RawVc::LocalOutput(task_id, _persistence, local_output_id) => {
363+
RawVc::LocalOutput(_task_id, _persistence, execution_id, local_output_id) => {
364364
debug_assert_eq!(this.consistency, ReadConsistency::Eventual);
365-
let read_result = tt.try_read_local_output(task_id, local_output_id);
365+
let read_result = tt.try_read_local_output(execution_id, local_output_id);
366366
match read_result {
367367
Ok(Ok(vc)) => {
368368
this.current = vc;

0 commit comments

Comments
 (0)