getrusage() does a lot more than what the SQPOLL accounting needs, the latter only cares about (and uses) the stime. Rather than do a full RUSAGE_SELF summation, just query the used stime instead.
Cc: stable@vger.kernel.org Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Signed-off-by: Jens Axboe axboe@kernel.dk --- io_uring/fdinfo.c | 9 +++++---- io_uring/sqpoll.c | 34 ++++++++++++++++++++-------------- io_uring/sqpoll.h | 1 + 3 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index ff3364531c77..966e06b078f6 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings; - struct rusage sq_usage; unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; unsigned int sq_head = READ_ONCE(r->sq.head); unsigned int sq_tail = READ_ONCE(r->sq.tail); @@ -152,14 +151,16 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) * thread termination. */ if (tsk) { + struct timespec64 ts; + get_task_struct(tsk); rcu_read_unlock(); - getrusage(tsk, RUSAGE_SELF, &sq_usage); + ts = io_sq_cpu_time(tsk); put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; - sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 - + sq_usage.ru_stime.tv_usec); + sq_total_time = (ts.tv_sec * 1000000 + + ts.tv_nsec / 1000); sq_work_time = sq->work_time; } else { rcu_read_unlock(); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index a3f11349ce06..8705b0aa82e0 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -11,6 +11,7 @@ #include <linux/audit.h> #include <linux/security.h> #include <linux/cpuset.h> +#include <linux/sched/cputime.h> #include <linux/io_uring.h>
#include <uapi/linux/io_uring.h> @@ -169,6 +170,22 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd) return READ_ONCE(sqd->state); }
+struct timespec64 io_sq_cpu_time(struct task_struct *tsk) +{ + u64 utime, stime; + + task_cputime_adjusted(tsk, &utime, &stime); + return ns_to_timespec64(stime); +} + +static void io_sq_update_worktime(struct io_sq_data *sqd, struct timespec64 start) +{ + struct timespec64 ts; + + ts = timespec64_sub(io_sq_cpu_time(current), start); + sqd->work_time += ts.tv_sec * 1000000 + ts.tv_nsec / 1000; +} + static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) { unsigned int to_submit; @@ -255,23 +272,12 @@ static bool io_sq_tw_pending(struct llist_node *retry_list) return retry_list || !llist_empty(&tctx->task_list); }
-static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start) -{ - struct rusage end; - - getrusage(current, RUSAGE_SELF, &end); - end.ru_stime.tv_sec -= start->ru_stime.tv_sec; - end.ru_stime.tv_usec -= start->ru_stime.tv_usec; - - sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000; -} - static int io_sq_thread(void *data) { struct llist_node *retry_list = NULL; struct io_sq_data *sqd = data; struct io_ring_ctx *ctx; - struct rusage start; + struct timespec64 start; unsigned long timeout = 0; char buf[TASK_COMM_LEN] = {}; DEFINE_WAIT(wait); @@ -317,7 +323,7 @@ static int io_sq_thread(void *data) }
cap_entries = !list_is_singular(&sqd->ctx_list); - getrusage(current, RUSAGE_SELF, &start); + start = io_sq_cpu_time(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { int ret = __io_sq_thread(ctx, cap_entries);
@@ -333,7 +339,7 @@ static int io_sq_thread(void *data)
if (sqt_spin || !time_after(jiffies, timeout)) { if (sqt_spin) { - io_sq_update_worktime(sqd, &start); + io_sq_update_worktime(sqd, start); timeout = jiffies + sqd->sq_thread_idle; } if (unlikely(need_resched())) { diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h index b83dcdec9765..84ed2b312e88 100644 --- a/io_uring/sqpoll.h +++ b/io_uring/sqpoll.h @@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); void io_put_sq_data(struct io_sq_data *sqd); void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); +struct timespec64 io_sq_cpu_time(struct task_struct *tsk);
static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) {
Jens Axboe axboe@kernel.dk writes:
getrusage() does a lot more than what the SQPOLL accounting needs, the latter only cares about (and uses) the stime. Rather than do a full RUSAGE_SELF summation, just query the used stime instead.
Cc: stable@vger.kernel.org Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Signed-off-by: Jens Axboe axboe@kernel.dk
io_uring/fdinfo.c | 9 +++++---- io_uring/sqpoll.c | 34 ++++++++++++++++++++-------------- io_uring/sqpoll.h | 1 + 3 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index ff3364531c77..966e06b078f6 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings;
- struct rusage sq_usage; unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; unsigned int sq_head = READ_ONCE(r->sq.head); unsigned int sq_tail = READ_ONCE(r->sq.tail);
@@ -152,14 +151,16 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) * thread termination. */ if (tsk) {
struct timespec64 ts;get_task_struct(tsk); rcu_read_unlock();
getrusage(tsk, RUSAGE_SELF, &sq_usage);
ts = io_sq_cpu_time(tsk); put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu;
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000+ sq_usage.ru_stime.tv_usec);
sq_total_time = (ts.tv_sec * 1000000 } else { rcu_read_unlock();+ ts.tv_nsec / 1000); sq_work_time = sq->work_time;diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index a3f11349ce06..8705b0aa82e0 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -11,6 +11,7 @@ #include <linux/audit.h> #include <linux/security.h> #include <linux/cpuset.h> +#include <linux/sched/cputime.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> @@ -169,6 +170,22 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd) return READ_ONCE(sqd->state); } +struct timespec64 io_sq_cpu_time(struct task_struct *tsk) +{
- u64 utime, stime;
- task_cputime_adjusted(tsk, &utime, &stime);
- return ns_to_timespec64(stime);
+}
+static void io_sq_update_worktime(struct io_sq_data *sqd, struct timespec64 start) +{
- struct timespec64 ts;
- ts = timespec64_sub(io_sq_cpu_time(current), start);
- sqd->work_time += ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
+}
Hi Jens,
Patch looks good. I'd just mention you are converting ns to timespec64, just to convert it back to ms when writing to sqd->work_time and sq_total_time. I think wraparound is not a concern for task_cputime_adjusted since this is the actual system cputime of a single thread inside a u64. So io_sq_cpu_time could just return ms directly and io_sq_update_worktime would be trivial:
sqd->work_time = io_sq_pu_time(current) - start.
Regardless:
Reviewed-by: Gabriel Krisman Bertazi krisman@suse.de
Thanks,
On 10/21/25 5:35 PM, Gabriel Krisman Bertazi wrote:
Jens Axboe axboe@kernel.dk writes:
getrusage() does a lot more than what the SQPOLL accounting needs, the latter only cares about (and uses) the stime. Rather than do a full RUSAGE_SELF summation, just query the used stime instead.
Cc: stable@vger.kernel.org Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Signed-off-by: Jens Axboe axboe@kernel.dk
io_uring/fdinfo.c | 9 +++++---- io_uring/sqpoll.c | 34 ++++++++++++++++++++-------------- io_uring/sqpoll.h | 1 + 3 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index ff3364531c77..966e06b078f6 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings;
- struct rusage sq_usage; unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; unsigned int sq_head = READ_ONCE(r->sq.head); unsigned int sq_tail = READ_ONCE(r->sq.tail);
@@ -152,14 +151,16 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) * thread termination. */ if (tsk) {
struct timespec64 ts;get_task_struct(tsk); rcu_read_unlock();
getrusage(tsk, RUSAGE_SELF, &sq_usage);
ts = io_sq_cpu_time(tsk); put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu;
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000+ sq_usage.ru_stime.tv_usec);
sq_total_time = (ts.tv_sec * 1000000 } else { rcu_read_unlock();+ ts.tv_nsec / 1000); sq_work_time = sq->work_time;diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index a3f11349ce06..8705b0aa82e0 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -11,6 +11,7 @@ #include <linux/audit.h> #include <linux/security.h> #include <linux/cpuset.h> +#include <linux/sched/cputime.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> @@ -169,6 +170,22 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd) return READ_ONCE(sqd->state); } +struct timespec64 io_sq_cpu_time(struct task_struct *tsk) +{
- u64 utime, stime;
- task_cputime_adjusted(tsk, &utime, &stime);
- return ns_to_timespec64(stime);
+}
+static void io_sq_update_worktime(struct io_sq_data *sqd, struct timespec64 start) +{
- struct timespec64 ts;
- ts = timespec64_sub(io_sq_cpu_time(current), start);
- sqd->work_time += ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
+}
Hi Jens,
Patch looks good. I'd just mention you are converting ns to timespec64, just to convert it back to ms when writing to sqd->work_time and sq_total_time. I think wraparound is not a concern for task_cputime_adjusted since this is the actual system cputime of a single thread inside a u64. So io_sq_cpu_time could just return ms directly and io_sq_update_worktime would be trivial:
sqd->work_time = io_sq_pu_time(current) - start.
That's a good point - I'll update both patches, folding and incremental like the below in. Thanks!
linux-stable-mirror@lists.linaro.org