Browse Source

kernel: Begin abstracting out _sched_spinlock

In a uniprocessor system, _sched_spinlock may not need to be
held in all the same cases that it does in a multiprocessor
system. Removing those unnecessary usages can lead to better
performance on UP systems. In the case of uncontested taking
and giving of a semaphore, this can be as much as a +14%
performance gain.

Signed-off-by: Peter Mitsis <peter.mitsis@intel.com>
pull/80003/head
Peter Mitsis 9 months ago committed by Maureen Helm
parent
commit
cedd36106b
  1. 17
      kernel/include/ksched.h
  2. 4
      tests/benchmarks/sched/src/main.c

17
kernel/include/ksched.h

@ -38,6 +38,12 @@ BUILD_ASSERT(K_LOWEST_APPLICATION_THREAD_PRIO @@ -38,6 +38,12 @@ BUILD_ASSERT(K_LOWEST_APPLICATION_THREAD_PRIO
#define Z_ASSERT_VALID_PRIO(prio, entry_point) __ASSERT((prio) == -1, "")
#endif /* CONFIG_MULTITHREADING */
#if (CONFIG_MP_MAX_NUM_CPUS == 1)
#define LOCK_SCHED_SPINLOCK
#else
#define LOCK_SCHED_SPINLOCK K_SPINLOCK(&_sched_spinlock)
#endif
extern struct k_spinlock _sched_spinlock;
extern struct k_thread _thread_dummy;
@ -159,11 +165,20 @@ static inline void unpend_thread_no_timeout(struct k_thread *thread) @@ -159,11 +165,20 @@ static inline void unpend_thread_no_timeout(struct k_thread *thread)
thread->base.pended_on = NULL;
}
/*
* In a multiprocessor system, z_unpend_first_thread() must lock the scheduler
* spinlock _sched_spinlock. However, in a uniprocessor system, that is not
* necessary as the caller has already taken precautions (in the form of
* locking interrupts).
*/
static ALWAYS_INLINE struct k_thread *z_unpend_first_thread(_wait_q_t *wait_q)
{
struct k_thread *thread = NULL;
K_SPINLOCK(&_sched_spinlock) {
__ASSERT_EVAL(, int key = arch_irq_lock(); arch_irq_unlock(key),
!arch_irq_unlocked(key), "");
LOCK_SCHED_SPINLOCK {
thread = _priq_wait_best(&wait_q->waitq);
if (unlikely(thread != NULL)) {
unpend_thread_no_timeout(thread);

4
tests/benchmarks/sched/src/main.c

@ -106,7 +106,10 @@ int main(void) @@ -106,7 +106,10 @@ int main(void)
uint64_t tot = 0U;
uint32_t runs = 0U;
int key;
for (int i = 0; i < N_RUNS + N_SETTLE; i++) {
key = arch_irq_lock();
stamp(UNPENDING);
z_unpend_first_thread(&waitq);
stamp(UNPENDED_READYING);
@ -121,6 +124,7 @@ int main(void) @@ -121,6 +124,7 @@ int main(void)
*/
k_yield();
stamp(YIELDED);
arch_irq_unlock(key);
uint32_t avg, whole = stamps[4] - stamps[0];

Loading…
Cancel
Save