zephyrproject-rtos · andrewboie · Jan 21, 2020 · Jan 16, 2020 · Jan 16, 2020 · Jan 16, 2020
diff --git a/arch/x86/core/intel64/locore.S b/arch/x86/core/intel64/locore.S
@@ -220,6 +220,13 @@ z_x86_switch:
 	movq $X86_KERNEL_CS, _thread_offset_to_cs(%rsi)
 	movq $X86_KERNEL_DS, _thread_offset_to_ss(%rsi)
 #endif
+	/* Store the handle (i.e. our thread struct address) into the
+	 * switch handle field, this is a synchronization signal that
+	 * must occur after the last data from the old context is
+	 * saved.
+	 */
+	movq %rsi, ___thread_t_switch_handle_OFFSET(%rsi)
+
 	movq %gs:__x86_tss64_t_ist1_OFFSET, %rsp
 
 	/* fall through to __resume */

diff --git a/boards/x86/qemu_x86/qemu_x86_64_defconfig b/boards/x86/qemu_x86/qemu_x86_64_defconfig
@@ -17,4 +17,3 @@ CONFIG_SMP=y
 CONFIG_MP_NUM_CPUS=2
 CONFIG_X86_MMU=y
 CONFIG_X86_VERY_EARLY_CONSOLE=y
-CONFIG_MP_NUM_CPUS=1
diff --git a/include/kernel_structs.h b/include/kernel_structs.h
@@ -59,8 +59,11 @@
 /* Thread is being aborted (SMP only) */
 #define _THREAD_ABORTING (BIT(5))
 
+/* Thread was aborted in interrupt context (SMP only) */
+#define _THREAD_ABORTED_IN_ISR (BIT(6))
+
 /* Thread is present in the ready queue */
-#define _THREAD_QUEUED (BIT(6))
+#define _THREAD_QUEUED (BIT(7))
 
 /* end - states */
 

diff --git a/kernel/include/kernel_arch_interface.h b/kernel/include/kernel_arch_interface.h
@@ -89,12 +89,31 @@ void arch_new_thread(struct k_thread *thread, k_thread_stack_t *pStack,
  * 3) Set the stack pointer to the value provided in switch_to
  * 4) Pop off all thread state from the stack we switched to and return.
  *
- * Some arches may implement thread->switch handle as a pointer to the thread
- * itself, and save context somewhere in thread->arch. In this case, on initial
- * context switch from the dummy thread, thread->switch handle for the outgoing
- * thread is NULL. Instead of dereferencing switched_from all the way to get
- * the thread pointer, subtract ___thread_t_switch_handle_OFFSET to obtain the
- * thread pointer instead.
+ * Some arches may implement thread->switch handle as a pointer to the
+ * thread itself, and save context somewhere in thread->arch. In this
+ * case, on initial context switch from the dummy thread,
+ * thread->switch handle for the outgoing thread is NULL. Instead of
+ * dereferencing switched_from all the way to get the thread pointer,
+ * subtract ___thread_t_switch_handle_OFFSET to obtain the thread
+ * pointer instead.  That is, such a scheme would have behavior like
+ * (in C pseudocode):
+ *
+ * void arch_switch(void *switch_to, void **switched_from)
+ * {
+ *     struct k_thread *new = switch_to;
+ *     struct k_thread *old = CONTAINER_OF(switched_from, struct k_thread,
+ *                                         switch_handle);
+ *
+ *     // save old context...
+ *     *switched_from = old;
+ *     // restore new context...
+ * }
+ *
+ * Note that, regardless of the underlying handle representation, the
+ * incoming switched_from pointer MUST be written through with a
+ * non-NULL value after all relevant thread state has been saved.  The
+ * kernel uses this as a synchronization signal to be able to wait for
+ * switch completion from another CPU.
  *
  * @param switch_to Incoming thread's switch handle
  * @param switched_from Pointer to outgoing thread's switch handle storage

diff --git a/kernel/include/kswap.h b/kernel/include/kswap.h
@@ -29,6 +29,26 @@ void z_smp_release_global_lock(struct k_thread *thread);
 /* context switching and scheduling-related routines */
 #ifdef CONFIG_USE_SWITCH
 
+/* There is an unavoidable SMP race when threads swap -- their thread
+ * record is in the queue (and visible to other CPUs) before
+ * arch_switch() finishes saving state.  We must spin for the switch
+ * handle before entering a new thread.  See docs on arch_switch().
+ *
+ * Note: future SMP architectures may need a fence/barrier or cache
+ * invalidation here.  Current ones don't, and sadly Zephyr doesn't
+ * have a framework for that yet.
+ */
+static inline void wait_for_switch(struct k_thread *thread)
+{
+#ifdef CONFIG_SMP
+	volatile void **shp = (void *)&thread->switch_handle;
+
+	while (*shp == NULL) {
+		k_busy_wait(1);
+	}
+#endif
+}
+
 /* New style context switching.  arch_switch() is a lower level
  * primitive that doesn't know about the scheduler or return value.
  * Needed for SMP, where the scheduler requires spinlocking that we
@@ -57,8 +77,25 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
 		k_spin_release(lock);
 	}
 
+#ifdef CONFIG_SMP
+	/* Null out the switch handle, see wait_for_switch() above.
+	 * Note that we set it back to a non-null value if we are not
+	 * switching!  The value itself doesn't matter, because by
+	 * definition _current is running and has no saved state.
+	 */
+	volatile void **shp = (void *)&old_thread->switch_handle;
+
+	*shp = NULL;
+#endif
+
 	new_thread = z_get_next_ready_thread();
 
+#ifdef CONFIG_SMP
+	if (new_thread == old_thread) {
+		*shp = old_thread;
+	}
+#endif
+
 	if (new_thread != old_thread) {
 		sys_trace_thread_switched_out();
 #ifdef CONFIG_TIMESLICING
@@ -77,6 +114,7 @@ static ALWAYS_INLINE unsigned int do_swap(unsigned int key,
 		}
 #endif
 		_current = new_thread;
+		wait_for_switch(new_thread);
 		arch_switch(new_thread->switch_handle,
 			     &old_thread->switch_handle);
 

diff --git a/kernel/queue.c b/kernel/queue.c
@@ -102,8 +102,8 @@ static inline void z_vrfy_k_queue_init(struct k_queue *queue)
 #if !defined(CONFIG_POLL)
 static void prepare_thread_to_run(struct k_thread *thread, void *data)
 {
-	z_ready_thread(thread);
 	z_thread_return_value_set_with_data(thread, 0, data);
+	z_ready_thread(thread);
 }
 #endif /* CONFIG_POLL */
 

diff --git a/kernel/sched.c b/kernel/sched.c
@@ -15,6 +15,13 @@
 #include <stdbool.h>
 #include <kernel_internal.h>
 
+/* Maximum time between the time a self-aborting thread flags itself
+ * DEAD and the last read or write to its stack memory (i.e. the time
+ * of its next swap()).  In theory this might be tuned per platform,
+ * but in practice this conservative value should be safe.
+ */
+#define THREAD_ABORT_DELAY_US 500
+
 #if defined(CONFIG_SCHED_DUMB)
 #define _priq_run_add		z_priq_dumb_add
 #define _priq_run_remove	z_priq_dumb_remove
@@ -436,7 +443,20 @@ void z_thread_single_abort(struct k_thread *thread)
 				thread->base.pended_on = NULL;
 			}
 		}
-		thread->base.thread_state |= _THREAD_DEAD;
+
+		u32_t mask = _THREAD_DEAD;
+
+		/* If the abort is happening in interrupt context,
+		 * that means that execution will never return to the
+		 * thread's stack and that the abort is known to be
+		 * complete.  Otherwise the thread still runs a bit
+		 * until it can swap, requiring a delay.
+		 */
+		if (IS_ENABLED(CONFIG_SMP) && arch_is_in_isr()) {
+			mask |= _THREAD_ABORTED_IN_ISR;
+		}
+
+		thread->base.thread_state |= mask;
 	}
 
 	sys_trace_thread_abort(thread);
@@ -616,10 +636,9 @@ void z_thread_priority_set(struct k_thread *thread, int prio)
 {
 	bool need_sched = z_set_prio(thread, prio);
 
-	if (IS_ENABLED(CONFIG_SMP) &&
-	    !IS_ENABLED(CONFIG_SCHED_IPI_SUPPORTED)) {
-		z_sched_ipi();
-	}
+#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
+	arch_sched_ipi();
+#endif
 
 	if (need_sched && _current->base.sched_locked == 0) {
 		z_reschedule_unlocked();
@@ -738,6 +757,8 @@ void *z_get_next_switch_handle(void *interrupted)
 	    !IS_ENABLED(CONFIG_SCHED_IPI_SUPPORTED)) {
 		z_sched_ipi();
 	}
+
+	wait_for_switch(_current);
 	return _current->switch_handle;
 }
 #endif
@@ -1144,10 +1165,9 @@ void z_impl_k_wakeup(k_tid_t thread)
 		z_reschedule_unlocked();
 	}
 
-	if (IS_ENABLED(CONFIG_SMP) &&
-	    !IS_ENABLED(CONFIG_SCHED_IPI_SUPPORTED)) {
-		z_sched_ipi();
-	}
+#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
+	arch_sched_ipi();
+#endif
 }
 
 #ifdef CONFIG_SMP
@@ -1180,7 +1200,9 @@ void z_sched_abort(struct k_thread *thread)
 	 * it locally.  Not all architectures support that, alas.  If
 	 * we don't have it, we need to wait for some other interrupt.
 	 */
+	key = k_spin_lock(&sched_spinlock);
 	thread->base.thread_state |= _THREAD_ABORTING;
+	k_spin_unlock(&sched_spinlock, key);
 #ifdef CONFIG_SCHED_IPI_SUPPORTED
 	arch_sched_ipi();
 #endif
@@ -1204,6 +1226,16 @@ void z_sched_abort(struct k_thread *thread)
 			k_busy_wait(100);
 		}
 	}
+
+	/* If the thread self-aborted (e.g. its own exit raced with
+	 * this external abort) then even though it is flagged DEAD,
+	 * it's still running until its next swap and thus the thread
+	 * object is still in use.  We have to resort to a fallback
+	 * delay in that circumstance.
+	 */
+	if ((thread->base.thread_state & _THREAD_ABORTED_IN_ISR) == 0U) {
+		k_busy_wait(THREAD_ABORT_DELAY_US);
+	}
 }
 #endif
 

diff --git a/kernel/thread.c b/kernel/thread.c
@@ -525,6 +525,11 @@ void z_setup_new_thread(struct k_thread *new_thread,
 	arch_new_thread(new_thread, stack, stack_size, entry, p1, p2, p3,
 			  prio, options);
 
+#ifdef CONFIG_SMP
+	/* switch handle must be non-null except when inside z_swap() */
+	new_thread->switch_handle = new_thread;
+#endif
+
 #ifdef CONFIG_THREAD_USERSPACE_LOCAL_DATA
 #ifndef CONFIG_THREAD_USERSPACE_LOCAL_DATA_ARCH_DEFER_SETUP
 	/* don't set again if the arch's own code in arch_new_thread() has

diff --git a/samples/userspace/shared_mem/src/main.c b/samples/userspace/shared_mem/src/main.c
@@ -78,7 +78,6 @@ struct k_thread ct_thread;
 K_THREAD_STACK_DEFINE(ct_stack, STACKSIZE);
 
 _app_enc_d char encMSG[] = "ENC!\n";
-_app_enc_d int enc_state = 1;
 _app_enc_b char enc_pt[50];  /* Copy form shared pt */
 _app_enc_b char enc_ct[50]; /* Copy to shared ct */
 
@@ -104,6 +103,15 @@ void main(void)
 	struct k_mem_partition *dom0_parts[] = {&part0, &part1};
 	k_tid_t tPT, tENC, tCT;
 
+	fBUFIN = 0; /* clear flags */
+	fBUFOUT = 0;
+	calc_rev_wheel((BYTE *) &W1, (BYTE *)&W1R);
+	calc_rev_wheel((BYTE *) &W2, (BYTE *)&W2R);
+	calc_rev_wheel((BYTE *) &W3, (BYTE *)&W3R);
+	IW1 = 0;
+	IW2 = 0;
+	IW3 = 0;
+
 	k_thread_access_grant(k_current_get(), &allforone);
 
 	/*
@@ -169,17 +177,6 @@ void enc(void)
 {
 
 	int index, index_out;
-	if (enc_state == 1) {
-		fBUFIN = 0; /* clear flags */
-		fBUFOUT = 0;
-		calc_rev_wheel((BYTE *) &W1, (BYTE *)&W1R);
-		calc_rev_wheel((BYTE *) &W2, (BYTE *)&W2R);
-		calc_rev_wheel((BYTE *) &W3, (BYTE *)&W3R);
-		IW1 = 0;
-		IW2 = 0;
-		IW3 = 0;
-		enc_state = 0;
-	}
 
 	while (1) {
 		k_sem_take(&allforone, K_FOREVER);