út 10. 12. 2024 v 21:44 odesílatel Sasha Levin <sashal(a)kernel.org> napsal:
>
> This is a note to let you know that I've just added the patch titled
>
> rtla/utils: Add idle state disabling via libcpupower
>
> to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> rtla-utils-add-idle-state-disabling-via-libcpupower.patch
> and it can be found in the queue-6.12 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
This is a part of a patchset implementing a new feature, rtla idle
state disabling, see [1]. It seems it was included in this stable
queue and also the one for 6.6 by mistake.
Also, the patch by itself does not do anything, because it depends on
preceding commits from the patchset to enable HAVE_LIBCPUPOWER_SUPPORT
and on following commits to actually implement the functionality in
the rtla command line interface.
Perhaps AUTOSEL picked it due to some merge conflicts with other
patches? I don't know of any though.
[1] - https://lore.kernel.org/linux-trace-kernel/20241017140914.3200454-1-tglozar…
>
>
>
> commit 354bcd3b3efd600f4d23cee6898a6968659bb3a9
> Author: Tomas Glozar <tglozar(a)redhat.com>
> Date: Thu Oct 17 16:09:11 2024 +0200
>
> rtla/utils: Add idle state disabling via libcpupower
>
> [ Upstream commit 083d29d3784319e9e9fab3ac02683a7b26ae3480 ]
>
> Add functions to utils.c to disable idle states through functions of
> libcpupower. This will serve as the basis for disabling idle states
> per cpu when running timerlat.
>
> Link: https://lore.kernel.org/20241017140914.3200454-4-tglozar@redhat.com
> Signed-off-by: Tomas Glozar <tglozar(a)redhat.com>
> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
> index 0735fcb827ed7..230f9fc7502dd 100644
> --- a/tools/tracing/rtla/src/utils.c
> +++ b/tools/tracing/rtla/src/utils.c
> @@ -4,6 +4,9 @@
> */
>
> #define _GNU_SOURCE
> +#ifdef HAVE_LIBCPUPOWER_SUPPORT
> +#include <cpuidle.h>
> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */
> #include <dirent.h>
> #include <stdarg.h>
> #include <stdlib.h>
> @@ -519,6 +522,153 @@ int set_cpu_dma_latency(int32_t latency)
> return fd;
> }
>
> +#ifdef HAVE_LIBCPUPOWER_SUPPORT
> +static unsigned int **saved_cpu_idle_disable_state;
> +static size_t saved_cpu_idle_disable_state_alloc_ctr;
> +
> +/*
> + * save_cpu_idle_state_disable - save disable for all idle states of a cpu
> + *
> + * Saves the current disable of all idle states of a cpu, to be subsequently
> + * restored via restore_cpu_idle_disable_state.
> + *
> + * Return: idle state count on success, negative on error
> + */
> +int save_cpu_idle_disable_state(unsigned int cpu)
> +{
> + unsigned int nr_states;
> + unsigned int state;
> + int disabled;
> + int nr_cpus;
> +
> + nr_states = cpuidle_state_count(cpu);
> +
> + if (nr_states == 0)
> + return 0;
> +
> + if (saved_cpu_idle_disable_state == NULL) {
> + nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
> + saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
> + if (!saved_cpu_idle_disable_state)
> + return -1;
> + }
> +
> + saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
> + if (!saved_cpu_idle_disable_state[cpu])
> + return -1;
> + saved_cpu_idle_disable_state_alloc_ctr++;
> +
> + for (state = 0; state < nr_states; state++) {
> + disabled = cpuidle_is_state_disabled(cpu, state);
> + if (disabled < 0)
> + return disabled;
> + saved_cpu_idle_disable_state[cpu][state] = disabled;
> + }
> +
> + return nr_states;
> +}
> +
> +/*
> + * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
> + *
> + * Restores the current disable state of all idle states of a cpu that was
> + * previously saved by save_cpu_idle_disable_state.
> + *
> + * Return: idle state count on success, negative on error
> + */
> +int restore_cpu_idle_disable_state(unsigned int cpu)
> +{
> + unsigned int nr_states;
> + unsigned int state;
> + int disabled;
> + int result;
> +
> + nr_states = cpuidle_state_count(cpu);
> +
> + if (nr_states == 0)
> + return 0;
> +
> + if (!saved_cpu_idle_disable_state)
> + return -1;
> +
> + for (state = 0; state < nr_states; state++) {
> + if (!saved_cpu_idle_disable_state[cpu])
> + return -1;
> + disabled = saved_cpu_idle_disable_state[cpu][state];
> + result = cpuidle_state_disable(cpu, state, disabled);
> + if (result < 0)
> + return result;
> + }
> +
> + free(saved_cpu_idle_disable_state[cpu]);
> + saved_cpu_idle_disable_state[cpu] = NULL;
> + saved_cpu_idle_disable_state_alloc_ctr--;
> + if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
> + free(saved_cpu_idle_disable_state);
> + saved_cpu_idle_disable_state = NULL;
> + }
> +
> + return nr_states;
> +}
> +
> +/*
> + * free_cpu_idle_disable_states - free saved idle state disable for all cpus
> + *
> + * Frees the memory used for storing cpu idle state disable for all cpus
> + * and states.
> + *
> + * Normally, the memory is freed automatically in
> + * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
> + * error.
> + */
> +void free_cpu_idle_disable_states(void)
> +{
> + int cpu;
> + int nr_cpus;
> +
> + if (!saved_cpu_idle_disable_state)
> + return;
> +
> + nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
> +
> + for (cpu = 0; cpu < nr_cpus; cpu++) {
> + free(saved_cpu_idle_disable_state[cpu]);
> + saved_cpu_idle_disable_state[cpu] = NULL;
> + }
> +
> + free(saved_cpu_idle_disable_state);
> + saved_cpu_idle_disable_state = NULL;
> +}
> +
> +/*
> + * set_deepest_cpu_idle_state - limit idle state of cpu
> + *
> + * Disables all idle states deeper than the one given in
> + * deepest_state (assuming states with higher number are deeper).
> + *
> + * This is used to reduce the exit from idle latency. Unlike
> + * set_cpu_dma_latency, it can disable idle states per cpu.
> + *
> + * Return: idle state count on success, negative on error
> + */
> +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
> +{
> + unsigned int nr_states;
> + unsigned int state;
> + int result;
> +
> + nr_states = cpuidle_state_count(cpu);
> +
> + for (state = deepest_state + 1; state < nr_states; state++) {
> + result = cpuidle_state_disable(cpu, state, 1);
> + if (result < 0)
> + return result;
> + }
> +
> + return nr_states;
> +}
> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */
> +
> #define _STR(x) #x
> #define STR(x) _STR(x)
>
> diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
> index 99c9cf81bcd02..101d4799a0090 100644
> --- a/tools/tracing/rtla/src/utils.h
> +++ b/tools/tracing/rtla/src/utils.h
> @@ -66,6 +66,19 @@ int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr);
> int set_comm_cgroup(const char *comm_prefix, const char *cgroup);
> int set_pid_cgroup(pid_t pid, const char *cgroup);
> int set_cpu_dma_latency(int32_t latency);
> +#ifdef HAVE_LIBCPUPOWER_SUPPORT
> +int save_cpu_idle_disable_state(unsigned int cpu);
> +int restore_cpu_idle_disable_state(unsigned int cpu);
> +void free_cpu_idle_disable_states(void);
> +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state);
> +static inline int have_libcpupower_support(void) { return 1; }
> +#else
> +static inline int save_cpu_idle_disable_state(unsigned int cpu) { return -1; }
> +static inline int restore_cpu_idle_disable_state(unsigned int cpu) { return -1; }
> +static inline void free_cpu_idle_disable_states(void) { }
> +static inline int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state) { return -1; }
> +static inline int have_libcpupower_support(void) { return 0; }
> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */
> int auto_house_keeping(cpu_set_t *monitored_cpus);
>
> #define ns_to_usf(x) (((double)x/1000))
>
Tomas
After upgrading from 6.6.52 to 6.6.58, tapping on the touchpad stopped
working. The problem is still present in 6.6.59.
I see the following in dmesg output; the first line was not there
previously:
[ 2.129282] hid-multitouch 0018:27C6:01E0.0001: The byte is not expected for fixing the report descriptor. It's possible that the touchpad firmware is not suitable for applying the fix. got: 9
[ 2.137479] input: GXTP5140:00 27C6:01E0 as /devices/platform/AMDI0010:00/i2c-0/i2c-GXTP5140:00/0018:27C6:01E0.0001/input/input10
[ 2.137680] input: GXTP5140:00 27C6:01E0 as /devices/platform/AMDI0010:00/i2c-0/i2c-GXTP5140:00/0018:27C6:01E0.0001/input/input11
[ 2.137921] hid-multitouch 0018:27C6:01E0.0001: input,hidraw0: I2C HID v1.00 Mouse [GXTP5140:00 27C6:01E0] on i2c-GXTP5140:00
Hardware is a Lenovo ThinkPad L15 Gen 4.
The problem goes away when reverting this commit:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/d…
See also Gentoo bug report: https://bugs.gentoo.org/942797
Hi,
Can you add the below to 6.1-stable? Thanks!
commit 3181e22fb79910c7071e84a43af93ac89e8a7106
Author: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Mon Jan 9 14:46:10 2023 +0000
io_uring: wake up optimisations
Commit 3181e22fb79910c7071e84a43af93ac89e8a7106 upstream.
Flush completions is done either from the submit syscall or by the
task_work, both are in the context of the submitter task, and when it
goes for a single threaded rings like implied by ->task_complete, there
won't be any waiters on ->cq_wait but the master task. That means that
there can be no tasks sleeping on cq_wait while we run
__io_submit_flush_completions() and so waking up can be skipped.
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/60ad9768ec74435a0ddaa6eec0ffa7729474f69f.16732742…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4f0ae938b146..0b1361663267 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -582,6 +582,16 @@ static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
io_cqring_ev_posted(ctx);
}
+static inline void __io_cq_unlock_post_flush(struct io_ring_ctx *ctx)
+ __releases(ctx->completion_lock)
+{
+ io_commit_cqring(ctx);
+ spin_unlock(&ctx->completion_lock);
+ io_commit_cqring_flush(ctx);
+ if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
+ __io_cqring_wake(ctx);
+}
+
void io_cq_unlock_post(struct io_ring_ctx *ctx)
{
__io_cq_unlock_post(ctx);
@@ -1339,7 +1349,7 @@ static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
if (!(req->flags & REQ_F_CQE_SKIP))
__io_fill_cqe_req(ctx, req);
}
- __io_cq_unlock_post(ctx);
+ __io_cq_unlock_post_flush(ctx);
io_free_batch_list(ctx, state->compl_reqs.first);
INIT_WQ_LIST(&state->compl_reqs);
--
Jens Axboe
From: Ard Biesheuvel <ardb(a)kernel.org>
When the host stage1 is configured for LPA2, the value currently being
programmed into TCR_EL2.T0SZ may be invalid unless LPA2 is configured
at HYP as well. This means kvm_lpa2_is_enabled() is not the right
condition to test when setting TCR_EL2.DS, as it will return false if
LPA2 is only available for stage 1 but not for stage 2.
Similary, programming TCR_EL2.PS based on a limited IPA range due to
lack of stage2 LPA2 support could potentially result in problems.
So use lpa2_is_enabled() instead, and set the PS field according to the
host's IPS, which is capped at 48 bits if LPA2 support is absent or
disabled. Whether or not we can make meaningful use of such a
configuration is a different question.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
arch/arm64/kvm/arm.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a102c3aebdbc..7b2735ad32e9 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1990,8 +1990,7 @@ static int kvm_init_vector_slots(void)
static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
{
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
- u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
- unsigned long tcr;
+ unsigned long tcr, ips;
/*
* Calculate the raw per-cpu offset without a translation from the
@@ -2005,6 +2004,7 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
params->mair_el2 = read_sysreg(mair_el1);
tcr = read_sysreg(tcr_el1);
+ ips = FIELD_GET(TCR_IPS_MASK, tcr);
if (cpus_have_final_cap(ARM64_KVM_HVHE)) {
tcr |= TCR_EPD1_MASK;
} else {
@@ -2014,8 +2014,8 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
tcr &= ~TCR_T0SZ_MASK;
tcr |= TCR_T0SZ(hyp_va_bits);
tcr &= ~TCR_EL2_PS_MASK;
- tcr |= FIELD_PREP(TCR_EL2_PS_MASK, kvm_get_parange(mmfr0));
- if (kvm_lpa2_is_enabled())
+ tcr |= FIELD_PREP(TCR_EL2_PS_MASK, ips);
+ if (lpa2_is_enabled())
tcr |= TCR_EL2_DS;
params->tcr_el2 = tcr;
--
2.47.1.613.gc27f4b7a9f-goog
From: Ard Biesheuvel <ardb(a)kernel.org>
Currently, LPA2 kernel support implies support for up to 52 bits of
physical addressing, and this is reflected in global definitions such as
PHYS_MASK_SHIFT and MAX_PHYSMEM_BITS.
This is potentially problematic, given that LPA2 hardware support is
modeled as a CPU feature which can be overridden, and with LPA2 hardware
support turned off, attempting to map physical regions with address bits
[51:48] set (which may exist on LPA2 capable systems booting with
arm64.nolva) will result in corrupted mappings with a truncated output
address and bogus shareability attributes.
This means that the accepted physical address range in the mapping
routines should be at most 48 bits wide when LPA2 support is configured
but not enabled at runtime.
Fixes: 352b0395b505 ("arm64: Enable 52-bit virtual addressing for 4k and 16k granule configs")
Cc: <stable(a)vger.kernel.org>
Reviewed-by: Anshuman Khandual <anshuman.khandual(a)arm.com>
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
arch/arm64/include/asm/pgtable-hwdef.h | 6 ------
arch/arm64/include/asm/pgtable-prot.h | 7 +++++++
arch/arm64/include/asm/sparsemem.h | 5 ++++-
3 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index c78a988cca93..a9136cc551cc 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -222,12 +222,6 @@
*/
#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61)
-/*
- * Highest possible physical address supported.
- */
-#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
-#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
-
#define TTBR_CNP_BIT (UL(1) << 0)
/*
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 9f9cf13bbd95..a95f1f77bb39 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -81,6 +81,7 @@ extern unsigned long prot_ns_shared;
#define lpa2_is_enabled() false
#define PTE_MAYBE_SHARED PTE_SHARED
#define PMD_MAYBE_SHARED PMD_SECT_S
+#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
#else
static inline bool __pure lpa2_is_enabled(void)
{
@@ -89,8 +90,14 @@ static inline bool __pure lpa2_is_enabled(void)
#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED)
#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S)
+#define PHYS_MASK_SHIFT (lpa2_is_enabled() ? CONFIG_ARM64_PA_BITS : 48)
#endif
+/*
+ * Highest possible physical address supported.
+ */
+#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
+
/*
* If we have userspace only BTI we don't want to mark kernel pages
* guarded even if the system does support BTI.
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 8a8acc220371..84783efdc9d1 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -5,7 +5,10 @@
#ifndef __ASM_SPARSEMEM_H
#define __ASM_SPARSEMEM_H
-#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
+#include <asm/pgtable-prot.h>
+
+#define MAX_PHYSMEM_BITS PHYS_MASK_SHIFT
+#define MAX_POSSIBLE_PHYSMEM_BITS (52)
/*
* Section size must be at least 512MB for 64K base
--
2.47.1.613.gc27f4b7a9f-goog
The PE Reset State "0" obtained from RTAS calls
ibm_read_slot_reset_[state|state2] indicates that
the Reset is deactivated and the PE is not in the MMIO
Stopped or DMA Stopped state.
With PE Reset State "0", the MMIO and DMA is allowed for
the PE. The function pseries_eeh_get_state() is currently
not indicating that to the caller because of which the
drivers are unable to resume the MMIO and DMA activity.
The patch fixes that by reflecting what is actually allowed.
Fixes: 00ba05a12b3c ("powerpc/pseries: Cleanup on pseries_eeh_get_state()")
Signed-off-by: Narayana Murty N <nnmlinux(a)linux.ibm.com>
---
Changelog:
V1:https://lore.kernel.org/all/20241107042027.338065-1-nnmlinux@linux.ibm.c…
--added Fixes tag for "powerpc/pseries: Cleanup on
pseries_eeh_get_state()".
---
arch/powerpc/platforms/pseries/eeh_pseries.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 1893f66371fa..b12ef382fec7 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -580,8 +580,10 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
switch(rets[0]) {
case 0:
- result = EEH_STATE_MMIO_ACTIVE |
- EEH_STATE_DMA_ACTIVE;
+ result = EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE |
+ EEH_STATE_MMIO_ENABLED |
+ EEH_STATE_DMA_ENABLED;
break;
case 1:
result = EEH_STATE_RESET_ACTIVE |
--
2.47.1