diff --git a/arch/Kconfig b/arch/Kconfig index f3e4f14dfbc..f1295eec38e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -24,6 +24,7 @@ config ARC imply XIP select ARCH_HAS_THREAD_LOCAL_STORAGE select ARCH_SUPPORTS_ROM_START + select ARCH_HAS_DIRECTED_IPIS help ARC architecture @@ -50,6 +51,7 @@ config ARM64 select USE_SWITCH_SUPPORTED select IRQ_OFFLOAD_NESTED if IRQ_OFFLOAD select BARRIER_OPERATIONS_ARCH + select ARCH_HAS_DIRECTED_IPIS help ARM64 (AArch64) architecture @@ -115,6 +117,7 @@ config RISCV select USE_SWITCH_SUPPORTED select USE_SWITCH select SCHED_IPI_SUPPORTED if SMP + select ARCH_HAS_DIRECTED_IPIS select BARRIER_OPERATIONS_BUILTIN imply XIP help @@ -129,6 +132,7 @@ config XTENSA select ARCH_HAS_CODE_DATA_RELOCATION select ARCH_HAS_TIMING_FUNCTIONS select ARCH_MEM_DOMAIN_DATA if USERSPACE + select ARCH_HAS_DIRECTED_IPIS help Xtensa architecture @@ -746,6 +750,13 @@ config ARCH_HAS_RESERVED_PAGE_FRAMES memory mappings. The architecture will need to implement arch_reserved_pages_update(). +config ARCH_HAS_DIRECTED_IPIS + bool + help + This hidden configuration should be selected by the architecture if + it has an implementation for arch_sched_directed_ipi() which allows + for IPIs to be directed to specific CPUs. + config CPU_HAS_DCACHE bool help diff --git a/arch/arc/core/smp.c b/arch/arc/core/smp.c index 9f8ee38a4a1..aa12623db80 100644 --- a/arch/arc/core/smp.c +++ b/arch/arc/core/smp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -130,21 +131,27 @@ static void sched_ipi_handler(const void *unused) z_sched_ipi(); } -/* arch implementation of sched_ipi */ -void arch_sched_ipi(void) +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { - uint32_t i; + unsigned int i; + unsigned int num_cpus = arch_num_cpus(); - /* broadcast sched_ipi request to other cores + /* Send sched_ipi request to other cores * if the target is current core, hardware will ignore it */ - unsigned int num_cpus = arch_num_cpus(); for (i = 0U; i < num_cpus; i++) { - z_arc_connect_ici_generate(i); + if ((cpu_bitmap & BIT(i)) != 0) { + z_arc_connect_ici_generate(i); + } } } +void arch_sched_broadcast_ipi(void) +{ + arch_sched_directed_ipi(IPI_ALL_CPUS_MASK); +} + int arch_smp_init(void) { struct arc_connect_bcr bcr; diff --git a/arch/arc/include/kernel_arch_func.h b/arch/arc/include/kernel_arch_func.h index 65a497e02d0..ca382a274f4 100644 --- a/arch/arc/include/kernel_arch_func.h +++ b/arch/arc/include/kernel_arch_func.h @@ -64,8 +64,6 @@ extern void z_arc_userspace_enter(k_thread_entry_t user_entry, void *p1, extern void z_arc_fatal_error(unsigned int reason, const struct arch_esf *esf); -extern void arch_sched_ipi(void); - extern void z_arc_switch(void *switch_to, void **switched_from); static inline void arch_switch(void *switch_to, void **switched_from) diff --git a/arch/arm/core/cortex_a_r/Kconfig b/arch/arm/core/cortex_a_r/Kconfig index 3ec57cc408e..4095a277c61 100644 --- a/arch/arm/core/cortex_a_r/Kconfig +++ b/arch/arm/core/cortex_a_r/Kconfig @@ -131,6 +131,7 @@ config AARCH32_ARMV8_R bool select ATOMIC_OPERATIONS_BUILTIN select SCHED_IPI_SUPPORTED if SMP + select ARCH_HAS_DIRECTED_IPIS help This option signifies the use of an ARMv8-R AArch32 processor implementation. diff --git a/arch/arm/core/cortex_a_r/smp.c b/arch/arm/core/cortex_a_r/smp.c index 9e06730f913..379b7663d01 100644 --- a/arch/arm/core/cortex_a_r/smp.c +++ b/arch/arm/core/cortex_a_r/smp.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "boot.h" #include "zephyr/cache.h" #include "zephyr/kernel/thread_stack.h" @@ -210,7 +211,7 @@ void arch_secondary_cpu_init(void) #ifdef CONFIG_SMP -static void broadcast_ipi(unsigned int ipi) +static void send_ipi(unsigned int ipi, uint32_t cpu_bitmap) { uint32_t mpidr = MPIDR_TO_CORE(GET_MPIDR()); @@ -220,6 +221,10 @@ static void broadcast_ipi(unsigned int ipi) unsigned int num_cpus = arch_num_cpus(); for (int i = 0; i < num_cpus; i++) { + if ((cpu_bitmap & BIT(i)) == 0) { + continue; + } + uint32_t target_mpidr = cpu_map[i]; uint8_t aff0; @@ -239,10 +244,14 @@ void sched_ipi_handler(const void *unused) z_sched_ipi(); } -/* arch implementation of sched_ipi */ -void arch_sched_ipi(void) +void arch_sched_broadcast_ipi(void) +{ + send_ipi(SGI_SCHED_IPI, IPI_ALL_CPUS_MASK); +} + +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { - broadcast_ipi(SGI_SCHED_IPI); + send_ipi(SGI_SCHED_IPI, cpu_bitmap); } int arch_smp_init(void) diff --git a/arch/arm64/core/smp.c b/arch/arm64/core/smp.c index 8777c400766..31dfcf337e4 100644 --- a/arch/arm64/core/smp.c +++ b/arch/arm64/core/smp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -180,7 +181,7 @@ void arch_secondary_cpu_init(int cpu_num) #ifdef CONFIG_SMP -static void broadcast_ipi(unsigned int ipi) +static void send_ipi(unsigned int ipi, uint32_t cpu_bitmap) { uint64_t mpidr = MPIDR_TO_CORE(GET_MPIDR()); @@ -190,6 +191,10 @@ static void broadcast_ipi(unsigned int ipi) unsigned int num_cpus = arch_num_cpus(); for (int i = 0; i < num_cpus; i++) { + if ((cpu_bitmap & BIT(i)) == 0) { + continue; + } + uint64_t target_mpidr = cpu_map[i]; uint8_t aff0; @@ -209,10 +214,14 @@ void sched_ipi_handler(const void *unused) z_sched_ipi(); } -/* arch implementation of sched_ipi */ -void arch_sched_ipi(void) +void arch_sched_broadcast_ipi(void) +{ + send_ipi(SGI_SCHED_IPI, IPI_ALL_CPUS_MASK); +} + +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { - broadcast_ipi(SGI_SCHED_IPI); + send_ipi(SGI_SCHED_IPI, cpu_bitmap); } #ifdef CONFIG_USERSPACE @@ -232,7 +241,7 @@ void mem_cfg_ipi_handler(const void *unused) void z_arm64_mem_cfg_ipi(void) { - broadcast_ipi(SGI_MMCFG_IPI); + send_ipi(SGI_MMCFG_IPI, IPI_ALL_CPUS_MASK); } #endif diff --git a/arch/riscv/core/smp.c b/arch/riscv/core/smp.c index 68147f8880a..b5b94aac25c 100644 --- a/arch/riscv/core/smp.c +++ b/arch/riscv/core/smp.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -86,14 +87,15 @@ static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS]; #define IPI_SCHED 0 #define IPI_FPU_FLUSH 1 -void arch_sched_ipi(void) +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { unsigned int key = arch_irq_lock(); unsigned int id = _current_cpu->id; unsigned int num_cpus = arch_num_cpus(); for (unsigned int i = 0; i < num_cpus; i++) { - if (i != id && _kernel.cpus[i].arch.online) { + if ((i != id) && _kernel.cpus[i].arch.online && + ((cpu_bitmap & BIT(i)) != 0)) { atomic_set_bit(&cpu_pending_ipi[i], IPI_SCHED); MSIP(_kernel.cpus[i].arch.hartid) = 1; } @@ -102,6 +104,11 @@ void arch_sched_ipi(void) arch_irq_unlock(key); } +void arch_sched_broadcast_ipi(void) +{ + arch_sched_directed_ipi(IPI_ALL_CPUS_MASK); +} + #ifdef CONFIG_FPU_SHARING void arch_flush_fpu_ipi(unsigned int cpu) { diff --git a/arch/x86/core/intel64/smp.c b/arch/x86/core/intel64/smp.c index a73ba9c8f38..b0232f21984 100644 --- a/arch/x86/core/intel64/smp.c +++ b/arch/x86/core/intel64/smp.c @@ -34,7 +34,7 @@ int arch_smp_init(void) * it is not clear exactly how/where/why to abstract this, as it * assumes the use of a local APIC (but there's no other mechanism). */ -void arch_sched_ipi(void) +void arch_sched_broadcast_ipi(void) { z_loapic_ipi(0, LOAPIC_ICR_IPI_OTHERS, CONFIG_SCHED_IPI_VECTOR); } diff --git a/doc/kernel/services/smp/smp.rst b/doc/kernel/services/smp/smp.rst index ca1e0149ad5..4b178432bd5 100644 --- a/doc/kernel/services/smp/smp.rst +++ b/doc/kernel/services/smp/smp.rst @@ -180,13 +180,17 @@ handle the newly-runnable load. So where possible, Zephyr SMP architectures should implement an interprocessor interrupt. The current framework is very simple: the -architecture provides a :c:func:`arch_sched_ipi` call, which when invoked -will flag an interrupt on all CPUs (except the current one, though -that is allowed behavior) which will then invoke the :c:func:`z_sched_ipi` -function implemented in the scheduler. The expectation is that these -APIs will evolve over time to encompass more functionality -(e.g. cross-CPU calls), and that the scheduler-specific calls here -will be implemented in terms of a more general framework. +architecture provides at least a :c:func:`arch_sched_broadcast_ipi` call, +which when invoked will flag an interrupt on all CPUs (except the current one, +though that is allowed behavior). If the architecture supports directed IPIs +(see :kconfig:option:`CONFIG_ARCH_HAS_DIRECTED_IPIS`), then the +architecture also provides a :c:func:`arch_sched_directed_ipi` call, which +when invoked will flag an interrupt on the specified CPUs. When an interrupt is +flagged on the CPUs, the :c:func:`z_sched_ipi` function implmented in the +scheduler will get invoked on those CPUs. The expectation is that these +APIs will evolve over time to encompass more functionality (e.g. cross-CPU +calls), and that the scheduler-specific calls here will be implemented in +terms of a more general framework. Note that not all SMP architectures will have a usable IPI mechanism (either missing, or just undocumented/unimplemented). In those cases diff --git a/include/zephyr/arch/arch_interface.h b/include/zephyr/arch/arch_interface.h index 797a60bbaa5..d7c33e511ce 100644 --- a/include/zephyr/arch/arch_interface.h +++ b/include/zephyr/arch/arch_interface.h @@ -494,10 +494,18 @@ static inline uint32_t arch_proc_id(void); /** * Broadcast an interrupt to all CPUs * - * This will invoke z_sched_ipi() on other CPUs in the system. + * This will invoke z_sched_ipi() on all other CPUs in the system. */ -void arch_sched_ipi(void); +void arch_sched_broadcast_ipi(void); +/** + * Direct IPIs to the specified CPUs + * + * This will invoke z_sched_ipi() on the CPUs identified by @a cpu_bitmap. + * + * @param cpu_bitmap A bitmap indicating which CPUs need the IPI + */ +void arch_sched_directed_ipi(uint32_t cpu_bitmap); int arch_smp_init(void); diff --git a/kernel/Kconfig.smp b/kernel/Kconfig.smp index 04fc01801b3..da83d1624e0 100644 --- a/kernel/Kconfig.smp +++ b/kernel/Kconfig.smp @@ -56,12 +56,11 @@ config MP_MAX_NUM_CPUS config SCHED_IPI_SUPPORTED bool help - True if the architecture supports a call to - arch_sched_ipi() to broadcast an interrupt that will call - z_sched_ipi() on other CPUs in the system. Required for - k_thread_abort() to operate with reasonable latency - (otherwise we might have to wait for the other thread to - take an interrupt, which can be arbitrarily far in the + True if the architecture supports a call to arch_sched_broadcast_ipi() + to broadcast an interrupt that will call z_sched_ipi() on other CPUs + in the system. Required for k_thread_abort() to operate with + reasonable latency (otherwise we might have to wait for the other + thread to take an interrupt, which can be arbitrarily far in the future). config TRACE_SCHED_IPI diff --git a/kernel/ipi.c b/kernel/ipi.c index 9985c9485c2..ee01c459425 100644 --- a/kernel/ipi.c +++ b/kernel/ipi.c @@ -81,7 +81,11 @@ void signal_pending_ipi(void) cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi); if (cpu_bitmap != 0) { - arch_sched_ipi(); +#ifdef CONFIG_ARCH_HAS_DIRECTED_IPIS + arch_sched_directed_ipi(cpu_bitmap); +#else + arch_sched_broadcast_ipi(); +#endif } } #endif /* CONFIG_SCHED_IPI_SUPPORTED */ diff --git a/kernel/sched.c b/kernel/sched.c index 34b256c22a8..67e5645bc6f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -474,7 +474,11 @@ static void z_thread_halt(struct k_thread *thread, k_spinlock_key_t key, thread->base.thread_state |= (terminate ? _THREAD_ABORTING : _THREAD_SUSPENDING); #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED) - arch_sched_ipi(); +#ifdef CONFIG_ARCH_HAS_DIRECTED_IPIS + arch_sched_directed_ipi(IPI_CPU_MASK(cpu->id)); +#else + arch_sched_broadcast_ipi(); +#endif #endif if (arch_is_in_isr()) { thread_halt_spin(thread, key); diff --git a/soc/espressif/esp32/esp32-mp.c b/soc/espressif/esp32/esp32-mp.c index c380df6c8b7..ca2de23e1e8 100644 --- a/soc/espressif/esp32/esp32-mp.c +++ b/soc/espressif/esp32/esp32-mp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -290,10 +291,12 @@ void arch_cpu_start(int cpu_num, k_thread_stack_t *stack, int sz, smp_log("ESP32: APPCPU initialized"); } -void arch_sched_ipi(void) +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { const int core_id = esp_core_id(); + ARG_UNUSED(cpu_bitmap); + if (core_id == 0) { DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_0_REG, DPORT_CPU_INTR_FROM_CPU_0); } else { @@ -301,6 +304,11 @@ void arch_sched_ipi(void) } } +void arch_sched_broadcast_ipi(void) +{ + arch_sched_directed_ipi(IPI_ALL_CPUS_MASK); +} + IRAM_ATTR bool arch_cpu_active(int cpu_num) { return cpus_active[cpu_num]; diff --git a/soc/intel/intel_adsp/ace/multiprocessing.c b/soc/intel/intel_adsp/ace/multiprocessing.c index 68b8693a520..1c9b3fa3d32 100644 --- a/soc/intel/intel_adsp/ace/multiprocessing.c +++ b/soc/intel/intel_adsp/ace/multiprocessing.c @@ -21,6 +21,7 @@ #include #include #include +#include #define CORE_POWER_CHECK_NUM 128 @@ -209,7 +210,7 @@ void soc_mp_startup(uint32_t cpu) #ifndef CONFIG_XTENSA_MMU ALWAYS_INLINE #endif -static void send_ipi(uint32_t msg) +static void send_ipi(uint32_t msg, uint32_t cpu_bitmap) { uint32_t curr = arch_proc_id(); @@ -217,24 +218,30 @@ static void send_ipi(uint32_t msg) unsigned int num_cpus = arch_num_cpus(); for (int core = 0; core < num_cpus; core++) { - if (core != curr && soc_cpus_active[core]) { + if ((core != curr) && soc_cpus_active[core] && + ((cpu_bitmap & BIT(core)) != 0)) { IDC[core].agents[1].ipc.idr = msg | INTEL_ADSP_IPC_BUSY; } } } -void arch_sched_ipi(void) -{ - send_ipi(0); -} - #if defined(CONFIG_XTENSA_MMU) && (CONFIG_MP_MAX_NUM_CPUS > 1) void xtensa_mmu_tlb_ipi(void) { - send_ipi(IPI_TLB_FLUSH); + send_ipi(IPI_TLB_FLUSH, IPI_ALL_CPUS_MASK); } #endif +void arch_sched_broadcast_ipi(void) +{ + send_ipi(0, IPI_ALL_CPUS_MASK); +} + +void arch_sched_directed_ipi(uint32_t cpu_bitmap) +{ + send_ipi(0, cpu_bitmap); +} + #if CONFIG_MP_MAX_NUM_CPUS > 1 int soc_adsp_halt_cpu(int id) { diff --git a/soc/intel/intel_adsp/cavs/multiprocessing.c b/soc/intel/intel_adsp/cavs/multiprocessing.c index 2a38f20355d..d87cd435e57 100644 --- a/soc/intel/intel_adsp/cavs/multiprocessing.c +++ b/soc/intel/intel_adsp/cavs/multiprocessing.c @@ -8,6 +8,7 @@ #include #include #include +#include /* IDC power up message to the ROM firmware. This isn't documented * anywhere, it's basically just a magic number (except the high bit, @@ -121,18 +122,29 @@ void soc_start_core(int cpu_num) IDC[curr_cpu].core[cpu_num].itc = IDC_MSG_POWER_UP; } -void arch_sched_ipi(void) +static void send_ipi(uint32_t cpu_bitmap) { uint32_t curr = arch_proc_id(); unsigned int num_cpus = arch_num_cpus(); for (int c = 0; c < num_cpus; c++) { - if (c != curr && soc_cpus_active[c]) { + if ((c != curr) && soc_cpus_active[c] && + ((cpu_bitmap & BIT(c)) != 0)) { IDC[curr].core[c].itc = BIT(31); } } } +void arch_sched_broadcast_ipi(void) +{ + send_ipi(IPI_ALL_CPUS_MASK); +} + +void arch_sched_directed_ipi(uint32_t cpu_bitmap) +{ + send_ipi(cpu_bitmap); +} + void idc_isr(const void *param) { ARG_UNUSED(param); diff --git a/tests/kernel/smp/src/main.c b/tests/kernel/smp/src/main.c index f73a1dfdbbb..7f556793e67 100644 --- a/tests/kernel/smp/src/main.c +++ b/tests/kernel/smp/src/main.c @@ -695,8 +695,8 @@ void z_trace_sched_ipi(void) * - To verify architecture layer provides a mechanism to issue an interprocessor * interrupt to all other CPUs in the system that calls the scheduler IPI. * We simply add a hook in z_sched_ipi(), in order to check if it has been - * called once in another CPU except the caller, when arch_sched_ipi() is - * called. + * called once in another CPU except the caller, when arch_sched_broadcast_ipi() + * is called. * * Testing techniques: * - Interface testing, function and block box testing, @@ -711,7 +711,7 @@ void z_trace_sched_ipi(void) * * Test Procedure: * -# In main thread, given a global variable sched_ipi_has_called equaled zero. - * -# Call arch_sched_ipi() then sleep for 100ms. + * -# Call arch_sched_broadcast_ipi() then sleep for 100ms. * -# In z_sched_ipi() handler, increment the sched_ipi_has_called. * -# In main thread, check the sched_ipi_has_called is not equaled to zero. * -# Repeat step 1 to 4 for 3 times. @@ -727,7 +727,7 @@ void z_trace_sched_ipi(void) * - This test using for the platform that support SMP, in our current scenario * , only x86_64 and arc supported. * - * @see arch_sched_ipi() + * @see arch_sched_broadcast_ipi() */ #ifdef CONFIG_SCHED_IPI_SUPPORTED ZTEST(smp, test_smp_ipi) @@ -741,7 +741,7 @@ ZTEST(smp, test_smp_ipi) for (int i = 0; i < 3 ; i++) { /* issue a sched ipi to tell other CPU to run thread */ sched_ipi_has_called = 0; - arch_sched_ipi(); + arch_sched_broadcast_ipi(); /* Need to wait longer than we think, loaded CI * systems need to wait for host scheduling to run the