Browse Source

kernel/sched: Add CONFIG_CPU_MASK_PIN_ONLY

Some SMP applications have threading designs where every thread
created is always assigned to a specific CPU, and never want to
schedule them symmetrically across CPUs under any circumstance.

In this situation, it's possible to optimize the run queue design a
bit to put a separate queue in each CPU struct instead of having a
single global one.  This is probably good for a few cycles per
scheduling event (maybe a bit more on architectures where cache
locality can be exploited) in circumstances where there is more than
one runnable thread.  It's a mild optimization, but a basically simple
one.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
pull/38955/head
Andy Ross 4 years ago committed by Christopher Friedt
parent
commit
b11e796c36
  1. 6
      include/kernel_structs.h
  2. 15
      kernel/Kconfig
  3. 2
      kernel/include/kernel_offsets.h
  4. 55
      kernel/sched.c
  5. 6
      kernel/thread.c

6
include/kernel_structs.h

@ -109,6 +109,10 @@ struct _cpu { @@ -109,6 +109,10 @@ struct _cpu {
/* one assigned idle thread per CPU */
struct k_thread *idle_thread;
#ifdef CONFIG_SCHED_CPU_MASK_PIN_ONLY
struct _ready_q ready_q;
#endif
#if (CONFIG_NUM_METAIRQ_PRIORITIES > 0) && (CONFIG_NUM_COOP_PRIORITIES > 0)
/* Coop thread preempted by current metairq, or NULL */
struct k_thread *metairq_preempted;
@ -143,7 +147,9 @@ struct z_kernel { @@ -143,7 +147,9 @@ struct z_kernel {
* ready queue: can be big, keep after small fields, since some
* assembly (e.g. ARC) are limited in the encoding of the offset
*/
#ifndef CONFIG_SCHED_CPU_MASK_PIN_ONLY
struct _ready_q ready_q;
#endif
#ifdef CONFIG_FPU_SHARING
/*

15
kernel/Kconfig

@ -136,6 +136,21 @@ config SCHED_CPU_MASK @@ -136,6 +136,21 @@ config SCHED_CPU_MASK
CPU. With one CPU, it's just a higher overhead version of
k_thread_start/stop().
config SCHED_CPU_MASK_PIN_ONLY
bool "CPU mask variant with single-CPU pinning only"
depends on SMP && SCHED_CPU_MASK
help
When true, enables a variant of SCHED_CPU_MASK where only
one CPU may be specified for every thread. Effectively, all
threads have a single "assigned" CPU and they will never be
scheduled symmetrically. In general this is not helpful,
but some applications have a carefully designed threading
architecture and want to make their own decisions about how
to assign work to CPUs. In that circumstance, some moderate
optimizations can be made (e.g. having a separate run queue
per CPU, keeping the list length shorter). Most
applications don't want this.
config MAIN_STACK_SIZE
int "Size of stack for initialization and main thread"
default 2048 if COVERAGE_GCOV

2
kernel/include/kernel_offsets.h

@ -40,7 +40,9 @@ GEN_OFFSET_SYM(_kernel_t, threads); @@ -40,7 +40,9 @@ GEN_OFFSET_SYM(_kernel_t, threads);
GEN_OFFSET_SYM(_kernel_t, idle);
#endif
#ifndef CONFIG_SCHED_CPU_MASK_PIN_ONLY
GEN_OFFSET_SYM(_kernel_t, ready_q);
#endif
#ifndef CONFIG_SMP
GEN_OFFSET_SYM(_ready_q_t, cache);

55
kernel/sched.c

@ -16,6 +16,7 @@ @@ -16,6 +16,7 @@
#include <kernel_internal.h>
#include <logging/log.h>
#include <sys/atomic.h>
#include <sys/math_extras.h>
LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL);
#if defined(CONFIG_SCHED_DUMB)
@ -187,19 +188,47 @@ ALWAYS_INLINE void z_priq_dumb_add(sys_dlist_t *pq, struct k_thread *thread) @@ -187,19 +188,47 @@ ALWAYS_INLINE void z_priq_dumb_add(sys_dlist_t *pq, struct k_thread *thread)
sys_dlist_append(pq, &thread->base.qnode_dlist);
}
ALWAYS_INLINE void runq_add(struct k_thread *thread)
static ALWAYS_INLINE void *thread_runq(struct k_thread *thread)
{
_priq_run_add(&_kernel.ready_q.runq, thread);
#ifdef CONFIG_SCHED_CPU_MASK_PIN_ONLY
int cpu, m = thread->base.cpu_mask;
/* Edge case: it's legal per the API to "make runnable" a
* thread with all CPUs masked off (i.e. one that isn't
* actually runnable!). Sort of a wart in the API and maybe
* we should address this in docs/assertions instead to avoid
* the extra test.
*/
cpu = m == 0 ? 0 : u32_count_trailing_zeros(m);
return &_kernel.cpus[cpu].ready_q.runq;
#else
return &_kernel.ready_q.runq;
#endif
}
ALWAYS_INLINE void runq_remove(struct k_thread *thread)
static ALWAYS_INLINE void *curr_cpu_runq(void)
{
_priq_run_remove(&_kernel.ready_q.runq, thread);
#ifdef CONFIG_SCHED_CPU_MASK_PIN_ONLY
return &arch_curr_cpu()->ready_q.runq;
#else
return &_kernel.ready_q.runq;
#endif
}
ALWAYS_INLINE struct k_thread *runq_best(void)
static ALWAYS_INLINE void runq_add(struct k_thread *thread)
{
return _priq_run_best(&_kernel.ready_q.runq);
_priq_run_add(thread_runq(thread), thread);
}
static ALWAYS_INLINE void runq_remove(struct k_thread *thread)
{
_priq_run_remove(thread_runq(thread), thread);
}
static ALWAYS_INLINE struct k_thread *runq_best(void)
{
return _priq_run_best(curr_cpu_runq());
}
/* _current is never in the run queue until context switch on
@ -1110,7 +1139,13 @@ void init_ready_q(struct _ready_q *rq) @@ -1110,7 +1139,13 @@ void init_ready_q(struct _ready_q *rq)
void z_sched_init(void)
{
#ifdef CONFIG_SCHED_CPU_MASK_PIN_ONLY
for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
init_ready_q(&_kernel.cpus[i].ready_q);
}
#else
init_ready_q(&_kernel.ready_q);
#endif
#ifdef CONFIG_TIMESLICING
k_sched_time_slice_set(CONFIG_TIMESLICE_SIZE,
@ -1433,6 +1468,14 @@ static int cpu_mask_mod(k_tid_t thread, uint32_t enable_mask, uint32_t disable_m @@ -1433,6 +1468,14 @@ static int cpu_mask_mod(k_tid_t thread, uint32_t enable_mask, uint32_t disable_m
ret = -EINVAL;
}
}
#if defined(CONFIG_ASSERT) && defined(CONFIG_SCHED_CPU_MASK_PIN_ONLY)
int m = thread->base.cpu_mask;
__ASSERT((m == 0) || ((m & (m - 1)) == 0),
"Only one CPU allowed in mask when PIN_ONLY");
#endif
return ret;
}

6
kernel/thread.c

@ -582,7 +582,11 @@ char *z_setup_new_thread(struct k_thread *new_thread, @@ -582,7 +582,11 @@ char *z_setup_new_thread(struct k_thread *new_thread,
}
#endif
#ifdef CONFIG_SCHED_CPU_MASK
new_thread->base.cpu_mask = -1;
if (IS_ENABLED(CONFIG_SCHED_CPU_MASK_PIN_ONLY)) {
new_thread->base.cpu_mask = 1; /* must specify only one cpu */
} else {
new_thread->base.cpu_mask = -1; /* allow all cpus */
}
#endif
#ifdef CONFIG_ARCH_HAS_CUSTOM_SWAP_TO_MAIN
/* _current may be null if the dummy thread is not used */

Loading…
Cancel
Save