Browse Source

kernel: Add CONFIG_ARCH_HAS_DIRECTED_IPIS

Platforms that support IPIs allow them to be broadcast via the
new arch_sched_broadcast_ipi() routine (replacing arch_sched_ipi()).
Those that also allow IPIs to be directed to specific CPUs may
use arch_sched_directed_ipi() to do so.

As the kernel has the capability to track which CPUs may need an IPI
(see CONFIG_IPI_OPTIMIZE), this commit updates the signalling of
tracked IPIs to use the directed version if supported; otherwise
they continue to use the broadcast version.

Platforms that allow directed IPIs may see a significant reduction
in the number of IPI related ISRs when CONFIG_IPI_OPTIMIZE is
enabled and the number of CPUs increases.  These platforms can be
identified by the Kconfig option CONFIG_ARCH_HAS_DIRECTED_IPIS.

Signed-off-by: Peter Mitsis <peter.mitsis@intel.com>
pull/73755/head
Peter Mitsis 1 year ago committed by Anas Nashif
parent
commit
0bcdae2c62
  1. 11
      arch/Kconfig
  2. 19
      arch/arc/core/smp.c
  3. 2
      arch/arc/include/kernel_arch_func.h
  4. 1
      arch/arm/core/cortex_a_r/Kconfig
  5. 17
      arch/arm/core/cortex_a_r/smp.c
  6. 19
      arch/arm64/core/smp.c
  7. 11
      arch/riscv/core/smp.c
  8. 2
      arch/x86/core/intel64/smp.c
  9. 18
      doc/kernel/services/smp/smp.rst
  10. 12
      include/zephyr/arch/arch_interface.h
  11. 11
      kernel/Kconfig.smp
  12. 6
      kernel/ipi.c
  13. 6
      kernel/sched.c
  14. 10
      soc/espressif/esp32/esp32-mp.c
  15. 23
      soc/intel/intel_adsp/ace/multiprocessing.c
  16. 16
      soc/intel/intel_adsp/cavs/multiprocessing.c
  17. 10
      tests/kernel/smp/src/main.c

11
arch/Kconfig

@ -24,6 +24,7 @@ config ARC @@ -24,6 +24,7 @@ config ARC
imply XIP
select ARCH_HAS_THREAD_LOCAL_STORAGE
select ARCH_SUPPORTS_ROM_START
select ARCH_HAS_DIRECTED_IPIS
help
ARC architecture
@ -50,6 +51,7 @@ config ARM64 @@ -50,6 +51,7 @@ config ARM64
select USE_SWITCH_SUPPORTED
select IRQ_OFFLOAD_NESTED if IRQ_OFFLOAD
select BARRIER_OPERATIONS_ARCH
select ARCH_HAS_DIRECTED_IPIS
help
ARM64 (AArch64) architecture
@ -115,6 +117,7 @@ config RISCV @@ -115,6 +117,7 @@ config RISCV
select USE_SWITCH_SUPPORTED
select USE_SWITCH
select SCHED_IPI_SUPPORTED if SMP
select ARCH_HAS_DIRECTED_IPIS
select BARRIER_OPERATIONS_BUILTIN
imply XIP
help
@ -129,6 +132,7 @@ config XTENSA @@ -129,6 +132,7 @@ config XTENSA
select ARCH_HAS_CODE_DATA_RELOCATION
select ARCH_HAS_TIMING_FUNCTIONS
select ARCH_MEM_DOMAIN_DATA if USERSPACE
select ARCH_HAS_DIRECTED_IPIS
help
Xtensa architecture
@ -746,6 +750,13 @@ config ARCH_HAS_RESERVED_PAGE_FRAMES @@ -746,6 +750,13 @@ config ARCH_HAS_RESERVED_PAGE_FRAMES
memory mappings. The architecture will need to implement
arch_reserved_pages_update().
config ARCH_HAS_DIRECTED_IPIS
bool
help
This hidden configuration should be selected by the architecture if
it has an implementation for arch_sched_directed_ipi() which allows
for IPIs to be directed to specific CPUs.
config CPU_HAS_DCACHE
bool
help

19
arch/arc/core/smp.c

@ -13,6 +13,7 @@ @@ -13,6 +13,7 @@
#include <zephyr/kernel.h>
#include <zephyr/kernel_structs.h>
#include <ksched.h>
#include <ipi.h>
#include <zephyr/init.h>
#include <zephyr/irq.h>
#include <arc_irq_offload.h>
@ -130,21 +131,27 @@ static void sched_ipi_handler(const void *unused) @@ -130,21 +131,27 @@ static void sched_ipi_handler(const void *unused)
z_sched_ipi();
}
/* arch implementation of sched_ipi */
void arch_sched_ipi(void)
void arch_sched_directed_ipi(uint32_t cpu_bitmap)
{
uint32_t i;
unsigned int i;
unsigned int num_cpus = arch_num_cpus();
/* broadcast sched_ipi request to other cores
/* Send sched_ipi request to other cores
* if the target is current core, hardware will ignore it
*/
unsigned int num_cpus = arch_num_cpus();
for (i = 0U; i < num_cpus; i++) {
z_arc_connect_ici_generate(i);
if ((cpu_bitmap & BIT(i)) != 0) {
z_arc_connect_ici_generate(i);
}
}
}
void arch_sched_broadcast_ipi(void)
{
arch_sched_directed_ipi(IPI_ALL_CPUS_MASK);
}
int arch_smp_init(void)
{
struct arc_connect_bcr bcr;

2
arch/arc/include/kernel_arch_func.h

@ -64,8 +64,6 @@ extern void z_arc_userspace_enter(k_thread_entry_t user_entry, void *p1, @@ -64,8 +64,6 @@ extern void z_arc_userspace_enter(k_thread_entry_t user_entry, void *p1,
extern void z_arc_fatal_error(unsigned int reason, const struct arch_esf *esf);
extern void arch_sched_ipi(void);
extern void z_arc_switch(void *switch_to, void **switched_from);
static inline void arch_switch(void *switch_to, void **switched_from)

1
arch/arm/core/cortex_a_r/Kconfig

@ -131,6 +131,7 @@ config AARCH32_ARMV8_R @@ -131,6 +131,7 @@ config AARCH32_ARMV8_R
bool
select ATOMIC_OPERATIONS_BUILTIN
select SCHED_IPI_SUPPORTED if SMP
select ARCH_HAS_DIRECTED_IPIS
help
This option signifies the use of an ARMv8-R AArch32 processor
implementation.

17
arch/arm/core/cortex_a_r/smp.c

@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
#include <zephyr/kernel.h>
#include <zephyr/arch/arm/cortex_a_r/lib_helpers.h>
#include <zephyr/drivers/interrupt_controller/gic.h>
#include <ipi.h>
#include "boot.h"
#include "zephyr/cache.h"
#include "zephyr/kernel/thread_stack.h"
@ -210,7 +211,7 @@ void arch_secondary_cpu_init(void) @@ -210,7 +211,7 @@ void arch_secondary_cpu_init(void)
#ifdef CONFIG_SMP
static void broadcast_ipi(unsigned int ipi)
static void send_ipi(unsigned int ipi, uint32_t cpu_bitmap)
{
uint32_t mpidr = MPIDR_TO_CORE(GET_MPIDR());
@ -220,6 +221,10 @@ static void broadcast_ipi(unsigned int ipi) @@ -220,6 +221,10 @@ static void broadcast_ipi(unsigned int ipi)
unsigned int num_cpus = arch_num_cpus();
for (int i = 0; i < num_cpus; i++) {
if ((cpu_bitmap & BIT(i)) == 0) {
continue;
}
uint32_t target_mpidr = cpu_map[i];
uint8_t aff0;
@ -239,10 +244,14 @@ void sched_ipi_handler(const void *unused) @@ -239,10 +244,14 @@ void sched_ipi_handler(const void *unused)
z_sched_ipi();
}
/* arch implementation of sched_ipi */
void arch_sched_ipi(void)
void arch_sched_broadcast_ipi(void)
{
send_ipi(SGI_SCHED_IPI, IPI_ALL_CPUS_MASK);
}
void arch_sched_directed_ipi(uint32_t cpu_bitmap)
{
broadcast_ipi(SGI_SCHED_IPI);
send_ipi(SGI_SCHED_IPI, cpu_bitmap);
}
int arch_smp_init(void)

19
arch/arm64/core/smp.c

@ -16,6 +16,7 @@ @@ -16,6 +16,7 @@
#include <zephyr/kernel.h>
#include <zephyr/kernel_structs.h>
#include <ksched.h>
#include <ipi.h>
#include <zephyr/init.h>
#include <zephyr/arch/arm64/mm.h>
#include <zephyr/arch/cpu.h>
@ -180,7 +181,7 @@ void arch_secondary_cpu_init(int cpu_num) @@ -180,7 +181,7 @@ void arch_secondary_cpu_init(int cpu_num)
#ifdef CONFIG_SMP
static void broadcast_ipi(unsigned int ipi)
static void send_ipi(unsigned int ipi, uint32_t cpu_bitmap)
{
uint64_t mpidr = MPIDR_TO_CORE(GET_MPIDR());
@ -190,6 +191,10 @@ static void broadcast_ipi(unsigned int ipi) @@ -190,6 +191,10 @@ static void broadcast_ipi(unsigned int ipi)
unsigned int num_cpus = arch_num_cpus();
for (int i = 0; i < num_cpus; i++) {
if ((cpu_bitmap & BIT(i)) == 0) {
continue;
}
uint64_t target_mpidr = cpu_map[i];
uint8_t aff0;
@ -209,10 +214,14 @@ void sched_ipi_handler(const void *unused) @@ -209,10 +214,14 @@ void sched_ipi_handler(const void *unused)
z_sched_ipi();
}
/* arch implementation of sched_ipi */
void arch_sched_ipi(void)
void arch_sched_broadcast_ipi(void)
{
send_ipi(SGI_SCHED_IPI, IPI_ALL_CPUS_MASK);
}
void arch_sched_directed_ipi(uint32_t cpu_bitmap)
{
broadcast_ipi(SGI_SCHED_IPI);
send_ipi(SGI_SCHED_IPI, cpu_bitmap);
}
#ifdef CONFIG_USERSPACE
@ -232,7 +241,7 @@ void mem_cfg_ipi_handler(const void *unused) @@ -232,7 +241,7 @@ void mem_cfg_ipi_handler(const void *unused)
void z_arm64_mem_cfg_ipi(void)
{
broadcast_ipi(SGI_MMCFG_IPI);
send_ipi(SGI_MMCFG_IPI, IPI_ALL_CPUS_MASK);
}
#endif

11
arch/riscv/core/smp.c

@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
#include <zephyr/init.h>
#include <zephyr/kernel.h>
#include <ksched.h>
#include <ipi.h>
#include <zephyr/irq.h>
#include <zephyr/sys/atomic.h>
#include <zephyr/arch/riscv/irq.h>
@ -86,14 +87,15 @@ static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS]; @@ -86,14 +87,15 @@ static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS];
#define IPI_SCHED 0
#define IPI_FPU_FLUSH 1
void arch_sched_ipi(void)
void arch_sched_directed_ipi(uint32_t cpu_bitmap)
{
unsigned int key = arch_irq_lock();
unsigned int id = _current_cpu->id;
unsigned int num_cpus = arch_num_cpus();
for (unsigned int i = 0; i < num_cpus; i++) {
if (i != id && _kernel.cpus[i].arch.online) {
if ((i != id) && _kernel.cpus[i].arch.online &&
((cpu_bitmap & BIT(i)) != 0)) {
atomic_set_bit(&cpu_pending_ipi[i], IPI_SCHED);
MSIP(_kernel.cpus[i].arch.hartid) = 1;
}
@ -102,6 +104,11 @@ void arch_sched_ipi(void) @@ -102,6 +104,11 @@ void arch_sched_ipi(void)
arch_irq_unlock(key);
}
void arch_sched_broadcast_ipi(void)
{
arch_sched_directed_ipi(IPI_ALL_CPUS_MASK);
}
#ifdef CONFIG_FPU_SHARING
void arch_flush_fpu_ipi(unsigned int cpu)
{

2
arch/x86/core/intel64/smp.c

@ -34,7 +34,7 @@ int arch_smp_init(void) @@ -34,7 +34,7 @@ int arch_smp_init(void)
* it is not clear exactly how/where/why to abstract this, as it
* assumes the use of a local APIC (but there's no other mechanism).
*/
void arch_sched_ipi(void)
void arch_sched_broadcast_ipi(void)
{
z_loapic_ipi(0, LOAPIC_ICR_IPI_OTHERS, CONFIG_SCHED_IPI_VECTOR);
}

18
doc/kernel/services/smp/smp.rst

@ -180,13 +180,17 @@ handle the newly-runnable load. @@ -180,13 +180,17 @@ handle the newly-runnable load.
So where possible, Zephyr SMP architectures should implement an
interprocessor interrupt. The current framework is very simple: the
architecture provides a :c:func:`arch_sched_ipi` call, which when invoked
will flag an interrupt on all CPUs (except the current one, though
that is allowed behavior) which will then invoke the :c:func:`z_sched_ipi`
function implemented in the scheduler. The expectation is that these
APIs will evolve over time to encompass more functionality
(e.g. cross-CPU calls), and that the scheduler-specific calls here
will be implemented in terms of a more general framework.
architecture provides at least a :c:func:`arch_sched_broadcast_ipi` call,
which when invoked will flag an interrupt on all CPUs (except the current one,
though that is allowed behavior). If the architecture supports directed IPIs
(see :kconfig:option:`CONFIG_ARCH_HAS_DIRECTED_IPIS`), then the
architecture also provides a :c:func:`arch_sched_directed_ipi` call, which
when invoked will flag an interrupt on the specified CPUs. When an interrupt is
flagged on the CPUs, the :c:func:`z_sched_ipi` function implmented in the
scheduler will get invoked on those CPUs. The expectation is that these
APIs will evolve over time to encompass more functionality (e.g. cross-CPU
calls), and that the scheduler-specific calls here will be implemented in
terms of a more general framework.
Note that not all SMP architectures will have a usable IPI mechanism
(either missing, or just undocumented/unimplemented). In those cases

12
include/zephyr/arch/arch_interface.h

@ -494,10 +494,18 @@ static inline uint32_t arch_proc_id(void); @@ -494,10 +494,18 @@ static inline uint32_t arch_proc_id(void);
/**
* Broadcast an interrupt to all CPUs
*
* This will invoke z_sched_ipi() on other CPUs in the system.
* This will invoke z_sched_ipi() on all other CPUs in the system.
*/
void arch_sched_ipi(void);
void arch_sched_broadcast_ipi(void);
/**
* Direct IPIs to the specified CPUs
*
* This will invoke z_sched_ipi() on the CPUs identified by @a cpu_bitmap.
*
* @param cpu_bitmap A bitmap indicating which CPUs need the IPI
*/
void arch_sched_directed_ipi(uint32_t cpu_bitmap);
int arch_smp_init(void);

11
kernel/Kconfig.smp

@ -56,12 +56,11 @@ config MP_MAX_NUM_CPUS @@ -56,12 +56,11 @@ config MP_MAX_NUM_CPUS
config SCHED_IPI_SUPPORTED
bool
help
True if the architecture supports a call to
arch_sched_ipi() to broadcast an interrupt that will call
z_sched_ipi() on other CPUs in the system. Required for
k_thread_abort() to operate with reasonable latency
(otherwise we might have to wait for the other thread to
take an interrupt, which can be arbitrarily far in the
True if the architecture supports a call to arch_sched_broadcast_ipi()
to broadcast an interrupt that will call z_sched_ipi() on other CPUs
in the system. Required for k_thread_abort() to operate with
reasonable latency (otherwise we might have to wait for the other
thread to take an interrupt, which can be arbitrarily far in the
future).
config TRACE_SCHED_IPI

6
kernel/ipi.c

@ -81,7 +81,11 @@ void signal_pending_ipi(void) @@ -81,7 +81,11 @@ void signal_pending_ipi(void)
cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi);
if (cpu_bitmap != 0) {
arch_sched_ipi();
#ifdef CONFIG_ARCH_HAS_DIRECTED_IPIS
arch_sched_directed_ipi(cpu_bitmap);
#else
arch_sched_broadcast_ipi();
#endif
}
}
#endif /* CONFIG_SCHED_IPI_SUPPORTED */

6
kernel/sched.c

@ -474,7 +474,11 @@ static void z_thread_halt(struct k_thread *thread, k_spinlock_key_t key, @@ -474,7 +474,11 @@ static void z_thread_halt(struct k_thread *thread, k_spinlock_key_t key,
thread->base.thread_state |= (terminate ? _THREAD_ABORTING
: _THREAD_SUSPENDING);
#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
arch_sched_ipi();
#ifdef CONFIG_ARCH_HAS_DIRECTED_IPIS
arch_sched_directed_ipi(IPI_CPU_MASK(cpu->id));
#else
arch_sched_broadcast_ipi();
#endif
#endif
if (arch_is_in_isr()) {
thread_halt_spin(thread, key);

10
soc/espressif/esp32/esp32-mp.c

@ -12,6 +12,7 @@ @@ -12,6 +12,7 @@
#include <zephyr/drivers/interrupt_controller/intc_esp32.h>
#include <soc.h>
#include <ksched.h>
#include <ipi.h>
#include <zephyr/device.h>
#include <zephyr/kernel.h>
#include <zephyr/spinlock.h>
@ -290,10 +291,12 @@ void arch_cpu_start(int cpu_num, k_thread_stack_t *stack, int sz, @@ -290,10 +291,12 @@ void arch_cpu_start(int cpu_num, k_thread_stack_t *stack, int sz,
smp_log("ESP32: APPCPU initialized");
}
void arch_sched_ipi(void)
void arch_sched_directed_ipi(uint32_t cpu_bitmap)
{
const int core_id = esp_core_id();
ARG_UNUSED(cpu_bitmap);
if (core_id == 0) {
DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_0_REG, DPORT_CPU_INTR_FROM_CPU_0);
} else {
@ -301,6 +304,11 @@ void arch_sched_ipi(void) @@ -301,6 +304,11 @@ void arch_sched_ipi(void)
}
}
void arch_sched_broadcast_ipi(void)
{
arch_sched_directed_ipi(IPI_ALL_CPUS_MASK);
}
IRAM_ATTR bool arch_cpu_active(int cpu_num)
{
return cpus_active[cpu_num];

23
soc/intel/intel_adsp/ace/multiprocessing.c

@ -21,6 +21,7 @@ @@ -21,6 +21,7 @@
#include <adsp_interrupt.h>
#include <zephyr/irq.h>
#include <zephyr/cache.h>
#include <ipi.h>
#define CORE_POWER_CHECK_NUM 128
@ -209,7 +210,7 @@ void soc_mp_startup(uint32_t cpu) @@ -209,7 +210,7 @@ void soc_mp_startup(uint32_t cpu)
#ifndef CONFIG_XTENSA_MMU
ALWAYS_INLINE
#endif
static void send_ipi(uint32_t msg)
static void send_ipi(uint32_t msg, uint32_t cpu_bitmap)
{
uint32_t curr = arch_proc_id();
@ -217,24 +218,30 @@ static void send_ipi(uint32_t msg) @@ -217,24 +218,30 @@ static void send_ipi(uint32_t msg)
unsigned int num_cpus = arch_num_cpus();
for (int core = 0; core < num_cpus; core++) {
if (core != curr && soc_cpus_active[core]) {
if ((core != curr) && soc_cpus_active[core] &&
((cpu_bitmap & BIT(core)) != 0)) {
IDC[core].agents[1].ipc.idr = msg | INTEL_ADSP_IPC_BUSY;
}
}
}
void arch_sched_ipi(void)
{
send_ipi(0);
}
#if defined(CONFIG_XTENSA_MMU) && (CONFIG_MP_MAX_NUM_CPUS > 1)
void xtensa_mmu_tlb_ipi(void)
{
send_ipi(IPI_TLB_FLUSH);
send_ipi(IPI_TLB_FLUSH, IPI_ALL_CPUS_MASK);
}
#endif
void arch_sched_broadcast_ipi(void)
{
send_ipi(0, IPI_ALL_CPUS_MASK);
}
void arch_sched_directed_ipi(uint32_t cpu_bitmap)
{
send_ipi(0, cpu_bitmap);
}
#if CONFIG_MP_MAX_NUM_CPUS > 1
int soc_adsp_halt_cpu(int id)
{

16
soc/intel/intel_adsp/cavs/multiprocessing.c

@ -8,6 +8,7 @@ @@ -8,6 +8,7 @@
#include <zephyr/irq.h>
#include <zephyr/pm/pm.h>
#include <zephyr/cache.h>
#include <ipi.h>
/* IDC power up message to the ROM firmware. This isn't documented
* anywhere, it's basically just a magic number (except the high bit,
@ -121,18 +122,29 @@ void soc_start_core(int cpu_num) @@ -121,18 +122,29 @@ void soc_start_core(int cpu_num)
IDC[curr_cpu].core[cpu_num].itc = IDC_MSG_POWER_UP;
}
void arch_sched_ipi(void)
static void send_ipi(uint32_t cpu_bitmap)
{
uint32_t curr = arch_proc_id();
unsigned int num_cpus = arch_num_cpus();
for (int c = 0; c < num_cpus; c++) {
if (c != curr && soc_cpus_active[c]) {
if ((c != curr) && soc_cpus_active[c] &&
((cpu_bitmap & BIT(c)) != 0)) {
IDC[curr].core[c].itc = BIT(31);
}
}
}
void arch_sched_broadcast_ipi(void)
{
send_ipi(IPI_ALL_CPUS_MASK);
}
void arch_sched_directed_ipi(uint32_t cpu_bitmap)
{
send_ipi(cpu_bitmap);
}
void idc_isr(const void *param)
{
ARG_UNUSED(param);

10
tests/kernel/smp/src/main.c

@ -695,8 +695,8 @@ void z_trace_sched_ipi(void) @@ -695,8 +695,8 @@ void z_trace_sched_ipi(void)
* - To verify architecture layer provides a mechanism to issue an interprocessor
* interrupt to all other CPUs in the system that calls the scheduler IPI.
* We simply add a hook in z_sched_ipi(), in order to check if it has been
* called once in another CPU except the caller, when arch_sched_ipi() is
* called.
* called once in another CPU except the caller, when arch_sched_broadcast_ipi()
* is called.
*
* Testing techniques:
* - Interface testing, function and block box testing,
@ -711,7 +711,7 @@ void z_trace_sched_ipi(void) @@ -711,7 +711,7 @@ void z_trace_sched_ipi(void)
*
* Test Procedure:
* -# In main thread, given a global variable sched_ipi_has_called equaled zero.
* -# Call arch_sched_ipi() then sleep for 100ms.
* -# Call arch_sched_broadcast_ipi() then sleep for 100ms.
* -# In z_sched_ipi() handler, increment the sched_ipi_has_called.
* -# In main thread, check the sched_ipi_has_called is not equaled to zero.
* -# Repeat step 1 to 4 for 3 times.
@ -727,7 +727,7 @@ void z_trace_sched_ipi(void) @@ -727,7 +727,7 @@ void z_trace_sched_ipi(void)
* - This test using for the platform that support SMP, in our current scenario
* , only x86_64 and arc supported.
*
* @see arch_sched_ipi()
* @see arch_sched_broadcast_ipi()
*/
#ifdef CONFIG_SCHED_IPI_SUPPORTED
ZTEST(smp, test_smp_ipi)
@ -741,7 +741,7 @@ ZTEST(smp, test_smp_ipi) @@ -741,7 +741,7 @@ ZTEST(smp, test_smp_ipi)
for (int i = 0; i < 3 ; i++) {
/* issue a sched ipi to tell other CPU to run thread */
sched_ipi_has_called = 0;
arch_sched_ipi();
arch_sched_broadcast_ipi();
/* Need to wait longer than we think, loaded CI
* systems need to wait for host scheduling to run the

Loading…
Cancel
Save