diff --git a/kernel/sched.c b/kernel/sched.c
index 6f57bd95589..d91a2d32cfb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1599,3 +1599,13 @@ int z_sched_waitq_walk(_wait_q_t  *wait_q,
 
 	return status;
 }
+
+/* This routine exists for benchmarking purposes. It is not used in
+ * general production code.
+ */
+void z_unready_thread(struct k_thread *thread)
+{
+	K_SPINLOCK(&_sched_spinlock) {
+		unready_thread(thread);
+	}
+}
diff --git a/tests/benchmarks/sched_queues/CMakeLists.txt b/tests/benchmarks/sched_queues/CMakeLists.txt
new file mode 100644
index 00000000000..751e2e0233c
--- /dev/null
+++ b/tests/benchmarks/sched_queues/CMakeLists.txt
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: Apache-2.0
+
+cmake_minimum_required(VERSION 3.20.0)
+find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
+project(sched_queues)
+
+FILE(GLOB app_sources src/*.c)
+target_sources(app PRIVATE ${app_sources})
+target_include_directories(app PRIVATE
+  ${ZEPHYR_BASE}/kernel/include
+  ${ZEPHYR_BASE}/arch/${ARCH}/include
+  )
diff --git a/tests/benchmarks/sched_queues/Kconfig b/tests/benchmarks/sched_queues/Kconfig
new file mode 100644
index 00000000000..f952fe24e1f
--- /dev/null
+++ b/tests/benchmarks/sched_queues/Kconfig
@@ -0,0 +1,30 @@
+# Copyright (c) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+mainmenu "Scheduler Queue Benchmark"
+
+source "Kconfig.zephyr"
+
+config BENCHMARK_NUM_ITERATIONS
+	int "Number of iterations to gather data"
+	default 1000
+	help
+	  This option specifies the number of times each test will be executed
+	  before calculating the average times for reporting.
+
+config BENCHMARK_NUM_THREADS
+	int "Number of threads"
+	default 100
+	help
+	  This option specifies the maximum number of threads that the test
+	  will add to the ready queue. Increasing this value will places greater
+	  stress on the ready queue and better highlight the performance
+	  differences as the number of threads in the ready queue changes.
+
+config BENCHMARK_VERBOSE
+	bool "Display detailed results"
+	default y
+	help
+	  This option displays the average time of all the iterations done for
+	  each thread in the tests. This generates large amounts of output. To
+	  analyze it, it is recommended to redirect the output to a file.
diff --git a/tests/benchmarks/sched_queues/README.rst b/tests/benchmarks/sched_queues/README.rst
new file mode 100644
index 00000000000..94bd45d41f6
--- /dev/null
+++ b/tests/benchmarks/sched_queues/README.rst
@@ -0,0 +1,21 @@
+Scheduling Queue Measurements
+#############################
+
+A Zephyr application developer may choose between three different scheduling
+algorithms--dumb, scalable and multiq. These different algorithms have
+different performance characteristics--characteristics that vary as the
+number of ready threads increases. This benchmark can be used to help
+determine which scheduling algorithm may best suit the developer's application.
+
+This benchmark measures the ...
+* Time to add a threads of increasing priority to the ready queue
+* Time to add threads of decreasing priority to the ready queue
+* Time to remove highest priority thread from a wait queue
+* Time to remove lowest priority thread from a wait queue
+
+By default, these tests show the minimum, maximum, and averages of the measured
+times. However, if the verbose option is enabled then the set of measured
+times will be displayed. The following will build this project with verbose
+support:
+
+    EXTRA_CONF_FILE="prj.verbose.conf" west build -p -b <board> <path to project>
diff --git a/tests/benchmarks/sched_queues/prj.conf b/tests/benchmarks/sched_queues/prj.conf
new file mode 100644
index 00000000000..2840887ae5e
--- /dev/null
+++ b/tests/benchmarks/sched_queues/prj.conf
@@ -0,0 +1,31 @@
+# Default base configuration file
+
+CONFIG_TEST=y
+
+# eliminate timer interrupts during the benchmark
+CONFIG_SYS_CLOCK_TICKS_PER_SEC=1
+
+# We use irq_offload(), enable it
+CONFIG_IRQ_OFFLOAD=y
+
+# Reduce memory/code footprint
+CONFIG_BT=n
+CONFIG_FORCE_NO_ASSERT=y
+
+CONFIG_TEST_HW_STACK_PROTECTION=n
+# Disable HW Stack Protection (see #28664)
+CONFIG_HW_STACK_PROTECTION=n
+CONFIG_COVERAGE=n
+
+# Disable system power management
+CONFIG_PM=n
+
+CONFIG_TIMING_FUNCTIONS=y
+
+CONFIG_HEAP_MEM_POOL_SIZE=2048
+CONFIG_APPLICATION_DEFINED_SYSCALL=y
+
+# Disable time slicing
+CONFIG_TIMESLICING=n
+
+CONFIG_SPEED_OPTIMIZATIONS=y
diff --git a/tests/benchmarks/sched_queues/prj.verbose.conf b/tests/benchmarks/sched_queues/prj.verbose.conf
new file mode 100644
index 00000000000..b6204397cea
--- /dev/null
+++ b/tests/benchmarks/sched_queues/prj.verbose.conf
@@ -0,0 +1,4 @@
+# Extra configuration file to enable verbose reporting
+# Use with EXTRA_CONF_FILE
+
+CONFIG_BENCHMARK_VERBOSE=y
diff --git a/tests/benchmarks/sched_queues/src/main.c b/tests/benchmarks/sched_queues/src/main.c
new file mode 100644
index 00000000000..669f0fd633e
--- /dev/null
+++ b/tests/benchmarks/sched_queues/src/main.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2024 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * @file
+ * This file contains the main testing module that invokes all the tests.
+ */
+
+#include <zephyr/kernel.h>
+#include <zephyr/timestamp.h>
+#include "utils.h"
+#include <zephyr/tc_util.h>
+#include <ksched.h>
+
+#define TEST_STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE)
+#define BUSY_STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE)
+
+uint32_t tm_off;
+
+/*
+ * Warning! Most of the created threads in this test use the same stack!
+ * This is done to reduce the memory footprint as having unique stacks
+ * for hundreds or thousands of threads would require substantial memory.
+ * We can get away with this approach as the threads sharing the same
+ * stack will not be executing, even though they will be ready to run.
+ */
+
+static K_THREAD_STACK_DEFINE(test_stack, TEST_STACK_SIZE);
+
+K_THREAD_STACK_ARRAY_DEFINE(busy_stack, CONFIG_MP_MAX_NUM_CPUS - 1, BUSY_STACK_SIZE);
+static struct k_thread busy_thread[CONFIG_MP_MAX_NUM_CPUS - 1];
+
+static struct k_thread test_thread[CONFIG_BENCHMARK_NUM_THREADS];
+
+static uint64_t add_cycles[CONFIG_BENCHMARK_NUM_THREADS];
+static uint64_t remove_cycles[CONFIG_BENCHMARK_NUM_THREADS];
+
+extern void z_unready_thread(struct k_thread *thread);
+
+static void busy_entry(void *p1, void *p2, void *p3)
+{
+	ARG_UNUSED(p1);
+	ARG_UNUSED(p2);
+	ARG_UNUSED(p3);
+
+	while (1) {
+	}
+}
+
+/**
+ * The test entry routine is not expected to execute.
+ */
+static void test_entry(void *p1, void *p2, void *p3)
+{
+	ARG_UNUSED(p2);
+	ARG_UNUSED(p3);
+
+	printk("Thread %u unexpectedly executed\n",
+	       (unsigned int)(uintptr_t)p1);
+
+	while (1) {
+	}
+}
+
+static void start_threads(unsigned int num_threads)
+{
+	unsigned int i;
+	unsigned int bucket_size;
+
+	/* Start the busy threads to execute on the other processors */
+
+	for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS - 1; i++) {
+		k_thread_create(&busy_thread[i], busy_stack[i], BUSY_STACK_SIZE,
+				busy_entry, NULL, NULL, NULL,
+				-1, 0, K_NO_WAIT);
+	}
+
+	bucket_size = (num_threads / CONFIG_NUM_PREEMPT_PRIORITIES) + 1;
+
+	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
+		k_thread_create(&test_thread[i], test_stack, TEST_STACK_SIZE,
+				test_entry, (void *)(uintptr_t)i, NULL, NULL,
+				i / bucket_size, 0, K_NO_WAIT);
+	}
+}
+
+static void cycles_reset(unsigned int num_threads)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_threads; i++) {
+		add_cycles[i] = 0ULL;
+		remove_cycles[i] = 0ULL;
+	}
+}
+
+static void test_decreasing_priority(unsigned int num_threads)
+{
+	unsigned int i;
+	timing_t start;
+	timing_t finish;
+
+	for (i = num_threads; i > 0; i--) {
+		start = timing_counter_get();
+		z_unready_thread(&test_thread[i - 1]);
+		finish = timing_counter_get();
+		remove_cycles[i - 1] += timing_cycles_get(&start, &finish);
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		start = timing_counter_get();
+		z_ready_thread(&test_thread[i]);
+		finish = timing_counter_get();
+		add_cycles[i] += timing_cycles_get(&start, &finish);
+	}
+}
+
+static void test_increasing_priority(unsigned int num_threads)
+{
+	unsigned int i;
+	timing_t start;
+	timing_t finish;
+
+	for (i = num_threads; i > 0; i--) {
+		start = timing_counter_get();
+		z_unready_thread(&test_thread[num_threads - i]);
+		finish = timing_counter_get();
+		remove_cycles[i - 1] += timing_cycles_get(&start, &finish);
+	}
+
+	for (i = num_threads; i > 0; i--) {
+		start = timing_counter_get();
+		z_ready_thread(&test_thread[i - 1]);
+		finish = timing_counter_get();
+		add_cycles[num_threads - i] += timing_cycles_get(&start, &finish);
+	}
+}
+
+static uint64_t sqrt_u64(uint64_t square)
+{
+	if (square > 1) {
+		uint64_t lo = sqrt_u64(square >> 2) << 1;
+		uint64_t hi = lo + 1;
+
+		return ((hi * hi) > square) ? lo : hi;
+	}
+
+	return square;
+}
+
+static void compute_and_report_stats(unsigned int num_threads,
+				     unsigned int num_iterations,
+				     uint64_t *cycles,
+				     const char *str)
+{
+	uint64_t minimum = cycles[0];
+	uint64_t maximum = cycles[0];
+	uint64_t total = cycles[0];
+	uint64_t average;
+	uint64_t std_dev = 0;
+	uint64_t tmp;
+	uint64_t diff;
+	unsigned int i;
+
+	for (i = 1; i < num_threads; i++) {
+		if (cycles[i] > maximum) {
+			maximum = cycles[i];
+		}
+
+		if (cycles[i] < minimum) {
+			minimum = cycles[i];
+		}
+
+		total += cycles[i];
+	}
+
+	minimum /= (uint64_t)num_iterations;
+	maximum /= (uint64_t)num_iterations;
+	average = total / (num_threads * num_iterations);
+
+	for (i = 0; i < num_threads; i++) {
+		tmp = cycles[i] / num_iterations;
+		diff = (average > tmp) ? (average - tmp) : (tmp - average);
+
+		std_dev += (diff * diff);
+	}
+	std_dev /= num_threads;
+	std_dev = sqrt_u64(std_dev);
+
+	printk("%s\n", str);
+
+	printk("    Minimum : %7llu cycles (%7u nsec)\n",
+	       minimum, (uint32_t)timing_cycles_to_ns(minimum));
+	printk("    Maximum : %7llu cycles (%7u nsec)\n",
+	       maximum, (uint32_t)timing_cycles_to_ns(maximum));
+	printk("    Average : %7llu cycles (%7u nsec)\n",
+	       average, (uint32_t)timing_cycles_to_ns(average));
+	printk("    Std Deviation: %7llu cycles (%7u nsec)\n",
+	       std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
+}
+
+int main(void)
+{
+	unsigned int i;
+	unsigned int freq;
+#ifdef CONFIG_BENCHMARK_VERBOSE
+	char description[120];
+	char tag[50];
+	struct k_thread *thread;
+#endif
+
+	timing_init();
+
+	bench_test_init();
+
+	freq = timing_freq_get_mhz();
+
+	printk("Time Measurements for %s sched queues\n",
+	       IS_ENABLED(CONFIG_SCHED_DUMB) ? "dumb" :
+	       IS_ENABLED(CONFIG_SCHED_SCALABLE) ? "scalable" : "multiq");
+	printk("Timing results: Clock frequency: %u MHz\n", freq);
+
+	start_threads(CONFIG_BENCHMARK_NUM_THREADS);
+
+	timing_start();
+
+	cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
+
+	for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
+		test_decreasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
+	}
+
+	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
+				 CONFIG_BENCHMARK_NUM_ITERATIONS,
+				 add_cycles,
+				 "Add threads of decreasing priority");
+
+#ifdef CONFIG_BENCHMARK_VERBOSE
+	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
+		snprintf(tag, sizeof(tag),
+			 "ReadyQ.add.to.tail.%04u.waiters", i);
+		snprintf(description, sizeof(description),
+			 "%-40s - Add thread of priority (%u)",
+			 tag, test_thread[i].base.prio);
+		PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
+				CONFIG_BENCHMARK_NUM_ITERATIONS);
+	}
+#endif
+
+	printk("------------------------------------\n");
+
+	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
+				 CONFIG_BENCHMARK_NUM_ITERATIONS,
+				 remove_cycles,
+				 "Remove threads of decreasing priority");
+
+#ifdef CONFIG_BENCHMARK_VERBOSE
+	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
+		snprintf(tag, sizeof(tag),
+			 "ReadyQ.remove.from.head.%04u.waiters", i);
+		snprintf(description, sizeof(description),
+			 "%-40s - Remove thread of priority %u",
+			 tag, test_thread[i].base.prio);
+		PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
+				CONFIG_BENCHMARK_NUM_ITERATIONS);
+	}
+#endif
+
+	printk("------------------------------------\n");
+
+	cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
+
+	for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
+		test_increasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
+	}
+
+	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
+				 CONFIG_BENCHMARK_NUM_ITERATIONS,
+				 add_cycles,
+				 "Add threads of increasing priority");
+
+#ifdef CONFIG_BENCHMARK_VERBOSE
+	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
+		snprintf(tag, sizeof(tag),
+			 "ReadyQ.add.to.head.%04u.waiters", i);
+		thread = &test_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
+		snprintf(description, sizeof(description),
+			 "%-40s - Add priority %u to readyq",
+			 tag, thread->base.prio);
+		PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
+				CONFIG_BENCHMARK_NUM_ITERATIONS);
+	}
+#endif
+
+	printk("------------------------------------\n");
+
+	compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS,
+				 CONFIG_BENCHMARK_NUM_ITERATIONS,
+				 remove_cycles,
+				 "Remove threads or increasing priority");
+
+#ifdef CONFIG_BENCHMARK_VERBOSE
+	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
+		snprintf(tag, sizeof(tag),
+			"ReadyQ.remove.from.tail.%04u.waiters",
+			CONFIG_BENCHMARK_NUM_THREADS - i);
+		thread = &test_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
+		snprintf(description, sizeof(description),
+			 "%-40s - Remove lowest priority from readyq (%u)",
+			 tag, thread->base.prio);
+		PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
+				CONFIG_BENCHMARK_NUM_ITERATIONS);
+	}
+#endif
+
+	for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
+		k_thread_abort(&test_thread[i]);
+	}
+
+	timing_stop();
+
+	TC_END_REPORT(0);
+
+	return 0;
+}
diff --git a/tests/benchmarks/sched_queues/src/utils.h b/tests/benchmarks/sched_queues/src/utils.h
new file mode 100644
index 00000000000..cca95dfc02a
--- /dev/null
+++ b/tests/benchmarks/sched_queues/src/utils.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2024 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#ifndef __BENCHMARK_SCHEDQ_UTILS_H
+#define __BENCHMARK_SCHEDQ_UTILS_H
+/*
+ * @brief This file contains macros used in the scheduler queue benchmarking.
+ */
+
+#include <zephyr/timing/timing.h>
+#include <zephyr/sys/printk.h>
+#include <stdio.h>
+
+#ifdef CSV_FORMAT_OUTPUT
+#define FORMAT_STR   "%-74s,%s,%s\n"
+#define CYCLE_FORMAT "%8u"
+#define NSEC_FORMAT  "%8u"
+#else
+#define FORMAT_STR   "%-74s:%s , %s\n"
+#define CYCLE_FORMAT "%8u cycles"
+#define NSEC_FORMAT  "%8u ns"
+#endif
+
+/**
+ * @brief Display a line of statistics
+ *
+ * This macro displays the following:
+ *  1. Test description summary
+ *  2. Number of cycles
+ *  3. Number of nanoseconds
+ */
+#define PRINT_F(summary, cycles, nsec)                            \
+	do {                                                      \
+		char cycle_str[32];                               \
+		char nsec_str[32];                                \
+								  \
+		snprintk(cycle_str, 30, CYCLE_FORMAT, cycles);    \
+		snprintk(nsec_str, 30, NSEC_FORMAT, nsec);        \
+		printk(FORMAT_STR, summary, cycle_str, nsec_str); \
+	} while (0)
+
+#define PRINT_STATS(summary, value)                   \
+	PRINT_F(summary, value,                       \
+		(uint32_t)timing_cycles_to_ns(value))
+
+#define PRINT_STATS_AVG(summary, value, counter)                    \
+	PRINT_F(summary, value / counter,                           \
+		(uint32_t)timing_cycles_to_ns_avg(value, counter))
+
+
+#endif
diff --git a/tests/benchmarks/sched_queues/testcase.yaml b/tests/benchmarks/sched_queues/testcase.yaml
new file mode 100644
index 00000000000..e3ecc3b3426
--- /dev/null
+++ b/tests/benchmarks/sched_queues/testcase.yaml
@@ -0,0 +1,25 @@
+common:
+  tags:
+    - kernel
+    - benchmark
+  integration_platforms:
+    - qemu_x86
+    - qemu_cortex_a53
+  harness: console
+  harness_config:
+    type: one_line
+    regex:
+      - "PROJECT EXECUTION SUCCESSFUL"
+
+tests:
+  benchmark.sched_queues.dumb:
+    extra_configs:
+      - CONFIG_SCHED_DUMB=y
+
+  benchmark.sched_queues.scalable:
+    extra_configs:
+      - CONFIG_SCHED_SCALABLE=y
+
+  benchmark.sched_queues.multiq:
+    extra_configs:
+      - CONFIG_SCHED_MULTIQ=y