From: Frederic Weisbecker frederic@kernel.org
[ Upstream commit e7f2be115f0746b969c0df14c0d182f65f005ca5 ]
getrusage(RUSAGE_THREAD) with nohz_full may return shorter utime/stime than the actual time.
task_cputime_adjusted() snapshots utime and stime and then adjust their sum to match the scheduler maintained cputime.sum_exec_runtime. Unfortunately in nohz_full, sum_exec_runtime is only updated once per second in the worst case, causing a discrepancy against utime and stime that can be updated anytime by the reader using vtime.
To fix this situation, perform an update of cputime.sum_exec_runtime when the cputime snapshot reports the task as actually running while the tick is disabled. The related overhead is then contained within the relevant situations.
Reported-by: Hasegawa Hitomi hasegawa-hitomi@fujitsu.com Signed-off-by: Frederic Weisbecker frederic@kernel.org Signed-off-by: Hasegawa Hitomi hasegawa-hitomi@fujitsu.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Tested-by: Masayoshi Mizuma m.mizuma@jp.fujitsu.com Acked-by: Phil Auld pauld@redhat.com Link: https://lore.kernel.org/r/20211026141055.57358-3-frederic@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- include/linux/sched/cputime.h | 5 +++-- kernel/sched/cputime.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 6c9f19a33865a..ce3c58286062c 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -18,15 +18,16 @@ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern void task_cputime(struct task_struct *t, +extern bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime); extern u64 task_gtime(struct task_struct *t); #else -static inline void task_cputime(struct task_struct *t, +static inline bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { *utime = t->utime; *stime = t->stime; + return false; }
static inline u64 task_gtime(struct task_struct *t) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 46ed4e1383e21..a7ebf0d8e369f 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -666,7 +666,8 @@ void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) .sum_exec_runtime = p->se.sum_exec_runtime, };
- task_cputime(p, &cputime.utime, &cputime.stime); + if (task_cputime(p, &cputime.utime, &cputime.stime)) + cputime.sum_exec_runtime = task_sched_runtime(p); cputime_adjust(&cputime, &p->prev_cputime, ut, st); } EXPORT_SYMBOL_GPL(task_cputime_adjusted); @@ -859,19 +860,21 @@ u64 task_gtime(struct task_struct *t) * add up the pending nohz execution time since the last * cputime snapshot. */ -void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) +bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { struct vtime *vtime = &t->vtime; unsigned int seq; u64 delta; + int ret;
if (!vtime_accounting_enabled()) { *utime = t->utime; *stime = t->stime; - return; + return false; }
do { + ret = false; seq = read_seqcount_begin(&vtime->seqcount);
*utime = t->utime; @@ -881,6 +884,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) if (vtime->state == VTIME_INACTIVE || is_idle_task(t)) continue;
+ ret = true; delta = vtime_delta(vtime);
/* @@ -892,5 +896,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) else if (vtime->state == VTIME_SYS) *stime += vtime->stime + delta; } while (read_seqcount_retry(&vtime->seqcount, seq)); + + return ret; } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
From: Vitaly Kuznetsov vkuznets@redhat.com
[ Upstream commit 908fa88e420f30dde6d80f092795a18ec72ca6d3 ]
With the elevated 'KVM_CAP_MAX_VCPUS' value kvm_create_max_vcpus test may hit RLIMIT_NOFILE limits:
# ./kvm_create_max_vcpus KVM_CAP_MAX_VCPU_ID: 4096 KVM_CAP_MAX_VCPUS: 1024 Testing creating 1024 vCPUs, with IDs 0...1023. /dev/kvm not available (errno: 24), skipping test
Adjust RLIMIT_NOFILE limits to make sure KVM_CAP_MAX_VCPUS fds can be opened. Note, raising hard limit ('rlim_max') requires CAP_SYS_RESOURCE capability which is generally not needed to run kvm selftests (but without raising the limit the test is doomed to fail anyway).
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Message-Id: 20211123135953.667434-1-vkuznets@redhat.com [Skip the test if the hard limit can be raised. - Paolo] Reviewed-by: Sean Christopherson seanjc@google.com Tested-by: Sean Christopherson seanjc@google.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../selftests/kvm/kvm_create_max_vcpus.c | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 231d79e57774e..cfe75536d8a55 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -12,6 +12,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/resource.h>
#include "test_util.h"
@@ -43,10 +44,39 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + /* + * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + + * an arbitrary number for everything else. + */ + int nr_fds_wanted = kvm_max_vcpus + 100; + struct rlimit rl;
printf("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); printf("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
+ /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + rl.rlim_max = nr_fds_wanted; + + int r = setrlimit(RLIMIT_NOFILE, &rl); + if (r < 0) { + printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + old_rlim_max, nr_fds_wanted); + exit(KSFT_SKIP); + } + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
On 12/13/21 15:20, Sasha Levin wrote:
From: Vitaly Kuznetsov vkuznets@redhat.com
[ Upstream commit 908fa88e420f30dde6d80f092795a18ec72ca6d3 ]
With the elevated 'KVM_CAP_MAX_VCPUS' value kvm_create_max_vcpus test may hit RLIMIT_NOFILE limits:
# ./kvm_create_max_vcpus KVM_CAP_MAX_VCPU_ID: 4096 KVM_CAP_MAX_VCPUS: 1024 Testing creating 1024 vCPUs, with IDs 0...1023. /dev/kvm not available (errno: 24), skipping test
Adjust RLIMIT_NOFILE limits to make sure KVM_CAP_MAX_VCPUS fds can be opened. Note, raising hard limit ('rlim_max') requires CAP_SYS_RESOURCE capability which is generally not needed to run kvm selftests (but without raising the limit the test is doomed to fail anyway).
Signed-off-by: Vitaly Kuznetsov vkuznets@redhat.com Message-Id: 20211123135953.667434-1-vkuznets@redhat.com [Skip the test if the hard limit can be raised. - Paolo] Reviewed-by: Sean Christopherson seanjc@google.com Tested-by: Sean Christopherson seanjc@google.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org
.../selftests/kvm/kvm_create_max_vcpus.c | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+)
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 231d79e57774e..cfe75536d8a55 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -12,6 +12,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/resource.h> #include "test_util.h" @@ -43,10 +44,39 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
- /*
* Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
* an arbitrary number for everything else.
*/
- int nr_fds_wanted = kvm_max_vcpus + 100;
- struct rlimit rl;
printf("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); printf("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
- /*
* Check that we're allowed to open nr_fds_wanted file descriptors and
* try raising the limits if needed.
*/
- TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
- if (rl.rlim_cur < nr_fds_wanted) {
rl.rlim_cur = nr_fds_wanted;
if (rl.rlim_max < nr_fds_wanted) {
int old_rlim_max = rl.rlim_max;
rl.rlim_max = nr_fds_wanted;
int r = setrlimit(RLIMIT_NOFILE, &rl);
if (r < 0) {
printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
old_rlim_max, nr_fds_wanted);
exit(KSFT_SKIP);
}
} else {
TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
}
- }
- /*
- Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
- Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
Acked-by: Paolo Bonzini pbonzini@redhat.com
linux-stable-mirror@lists.linaro.org