Add a selftest to validate the behavior of the NUMA-aware scheduler functionalities, including idle CPU selection within nodes, per-node DSQs and CPU to node mapping.
Signed-off-by: Andrea Righi arighi@nvidia.com --- tools/testing/selftests/sched_ext/Makefile | 1 + tools/testing/selftests/sched_ext/numa.bpf.c | 100 +++++++++++++++++++ tools/testing/selftests/sched_ext/numa.c | 59 +++++++++++ 3 files changed, 160 insertions(+) create mode 100644 tools/testing/selftests/sched_ext/numa.bpf.c create mode 100644 tools/testing/selftests/sched_ext/numa.c
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 0117622246007..f4531327b8e76 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -172,6 +172,7 @@ auto-test-targets := \ maximal \ maybe_null \ minimal \ + numa \ prog_run \ reload_loop \ select_cpu_dfl \ diff --git a/tools/testing/selftests/sched_ext/numa.bpf.c b/tools/testing/selftests/sched_ext/numa.bpf.c new file mode 100644 index 0000000000000..a79d86ed54a1b --- /dev/null +++ b/tools/testing/selftests/sched_ext/numa.bpf.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A scheduler that validates the behavior of the NUMA-aware + * functionalities. + * + * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks + * are exclusively processed by CPUs within their respective nodes. Idle + * CPUs are selected only within the same node, so task migration can only + * occurs between CPUs belonging to the same node. + * + * Copyright (c) 2025 Andrea Righi arighi@nvidia.com + */ + +#include <scx/common.bpf.h> + +char _license[] SEC("license") = "GPL"; + +UEI_DEFINE(uei); + +const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE; + +static bool is_cpu_idle(s32 cpu, int node) +{ + const struct cpumask *idle_cpumask; + bool idle; + + idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node); + idle = bpf_cpumask_test_cpu(cpu, idle_cpumask); + scx_bpf_put_cpumask(idle_cpumask); + + return idle; +} + +s32 BPF_STRUCT_OPS(numa_select_cpu, + struct task_struct *p, s32 prev_cpu, u64 wake_flags) +{ + int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p)); + s32 cpu; + + /* + * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here, + * since it already tries to pick an idle CPU within the node + * first, but let's use both functions for better testing coverage. + */ + cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node, + __COMPAT_SCX_PICK_IDLE_IN_NODE); + if (cpu < 0) + cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node, + __COMPAT_SCX_PICK_IDLE_IN_NODE); + + if (is_cpu_idle(cpu, node)) + scx_bpf_error("CPU %d should be marked as busy", cpu); + + if (__COMPAT_scx_bpf_cpu_node(cpu) != node) + scx_bpf_error("CPU %d should be in node %d", cpu, node); + + return cpu; +} + +void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags) +{ + int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p)); + + scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags); +} + +void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev) +{ + int node = __COMPAT_scx_bpf_cpu_node(cpu); + + scx_bpf_dsq_move_to_local(node); +} + +s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init) +{ + int node, err; + + bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) { + err = scx_bpf_create_dsq(node, node); + if (err) + return err; + } + + return 0; +} + +void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei) +{ + UEI_RECORD(uei, ei); +} + +SEC(".struct_ops.link") +struct sched_ext_ops numa_ops = { + .select_cpu = (void *)numa_select_cpu, + .enqueue = (void *)numa_enqueue, + .dispatch = (void *)numa_dispatch, + .init = (void *)numa_init, + .exit = (void *)numa_exit, + .name = "numa", +}; diff --git a/tools/testing/selftests/sched_ext/numa.c b/tools/testing/selftests/sched_ext/numa.c new file mode 100644 index 0000000000000..b060c3b65c82b --- /dev/null +++ b/tools/testing/selftests/sched_ext/numa.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 Andrea Righi arighi@nvidia.com + */ +#include <bpf/bpf.h> +#include <scx/common.h> +#include <sys/wait.h> +#include <unistd.h> +#include "numa.bpf.skel.h" +#include "scx_test.h" + +static enum scx_test_status setup(void **ctx) +{ + struct numa *skel; + + skel = numa__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + skel->rodata->__COMPAT_SCX_PICK_IDLE_IN_NODE = SCX_PICK_IDLE_IN_NODE; + skel->struct_ops.numa_ops->flags = SCX_OPS_BUILTIN_IDLE_PER_NODE; + SCX_FAIL_IF(numa__load(skel), "Failed to load skel"); + + *ctx = skel; + + return SCX_TEST_PASS; +} + +static enum scx_test_status run(void *ctx) +{ + struct numa *skel = ctx; + struct bpf_link *link; + + link = bpf_map__attach_struct_ops(skel->maps.numa_ops); + SCX_FAIL_IF(!link, "Failed to attach scheduler"); + + /* Just sleeping is fine, plenty of scheduling events happening */ + sleep(1); + + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE)); + bpf_link__destroy(link); + + return SCX_TEST_PASS; +} + +static void cleanup(void *ctx) +{ + struct numa *skel = ctx; + + numa__destroy(skel); +} + +struct scx_test numa = { + .name = "numa", + .description = "Verify NUMA-aware functionalities", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&numa)
linux-kselftest-mirror@lists.linaro.org