Because DAMON sleeps in uninterruptible mode, /proc/loadavg reports fake load while DAMON is turned on, though it is doing nothing. This can confuse users[1]. To avoid the case, this commit makes DAMON sleeps in idle mode.
[1] https://lore.kernel.org/all/11868371.O9o76ZdvQC@natalenko.name/
Fixes: 2224d8485492 ("mm: introduce Data Access MONitor (DAMON)") Reported-by: Oleksandr Natalenko oleksandr@natalenko.name Signed-off-by: SeongJae Park sj@kernel.org Cc: stable@vger.kernel.org # 5.15.x --- I think this needs to be applied on v5.15.y, but this cannot cleanly applied there as is. I will back-port this on v5.15.y and post later once this is merged in the mainline.
mm/damon/core.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/mm/damon/core.c b/mm/damon/core.c index daacd9536c7c..7813f47aadc9 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -12,6 +12,8 @@ #include <linux/kthread.h> #include <linux/mm.h> #include <linux/random.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/slab.h> #include <linux/string.h>
@@ -976,12 +978,25 @@ static unsigned long damos_wmark_wait_us(struct damos *scheme) return 0; }
+/* sleep for @usecs in idle mode */ +static void __sched damon_usleep_idle(unsigned long usecs) +{ + ktime_t exp = ktime_add_us(ktime_get(), usecs); + u64 delta = usecs * NSEC_PER_USEC / 100; /* allow 1% error */ + + for (;;) { + __set_current_state(TASK_IDLE); + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) + break; + } +} + static void kdamond_usleep(unsigned long usecs) { if (usecs > 100 * 1000) - schedule_timeout_interruptible(usecs_to_jiffies(usecs)); + schedule_timeout_idle(usecs_to_jiffies(usecs)); else - usleep_range(usecs, usecs + 1); + damon_usleep_idle(usecs); }
/* Returns negative error code if it's not activated but should return */ @@ -1036,7 +1051,7 @@ static int kdamond_fn(void *data) ctx->callback.after_sampling(ctx)) done = true;
- usleep_range(ctx->sample_interval, ctx->sample_interval + 1); + kdamond_usleep(ctx->sample_interval);
if (ctx->primitive.check_accesses) max_nr_accesses = ctx->primitive.check_accesses(ctx);