From: Thomas Gleixner tglx@linutronix.de
[ Upstream commit a7fed5c0431dbfa707037848830f980e0f93cfb3 ]
register_nmi_handler() has no sanity check whether a handler has been registered already. Such an unintended double-add leads to list corruption and hard to diagnose problems during the next NMI handling.
Init the list head in the static NMI action struct and check it for being empty in register_nmi_handler().
[ bp: Fixups. ]
Reported-by: Sean Christopherson seanjc@google.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Borislav Petkov bp@suse.de Link: https://lore.kernel.org/lkml/20220511234332.3654455-1-seanjc@google.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/include/asm/nmi.h | 1 + arch/x86/kernel/nmi.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 1cb9c17a4cb4..5c5f1e56c404 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -47,6 +47,7 @@ struct nmiaction { #define register_nmi_handler(t, fn, fg, n, init...) \ ({ \ static struct nmiaction init fn##_na = { \ + .list = LIST_HEAD_INIT(fn##_na.list), \ .handler = (fn), \ .name = (n), \ .flags = (fg), \ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 4bce802d25fb..399648421223 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -157,7 +157,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) struct nmi_desc *desc = nmi_to_desc(type); unsigned long flags;
- if (!action->handler) + if (WARN_ON_ONCE(!action->handler || !list_empty(&action->list))) return -EINVAL;
raw_spin_lock_irqsave(&desc->lock, flags); @@ -177,7 +177,7 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) list_add_rcu(&action->list, &desc->head); else list_add_tail_rcu(&action->list, &desc->head); - + raw_spin_unlock_irqrestore(&desc->lock, flags); return 0; } @@ -186,7 +186,7 @@ EXPORT_SYMBOL(__register_nmi_handler); void unregister_nmi_handler(unsigned int type, const char *name) { struct nmi_desc *desc = nmi_to_desc(type); - struct nmiaction *n; + struct nmiaction *n, *found = NULL; unsigned long flags;
raw_spin_lock_irqsave(&desc->lock, flags); @@ -200,12 +200,16 @@ void unregister_nmi_handler(unsigned int type, const char *name) WARN(in_nmi(), "Trying to free NMI (%s) from NMI context!\n", n->name); list_del_rcu(&n->list); + found = n; break; } }
raw_spin_unlock_irqrestore(&desc->lock, flags); - synchronize_rcu(); + if (found) { + synchronize_rcu(); + INIT_LIST_HEAD(&found->list); + } } EXPORT_SYMBOL_GPL(unregister_nmi_handler);
From: Sebastian Andrzej Siewior bigeasy@linutronix.de
[ Upstream commit 21673fcb2532dcd189905ff5a5389eb7dcd0e57a ]
The IRQ simulator uses irq_work to trigger an interrupt. Without the IRQ_WORK_HARD_IRQ flag the irq_work will be performed in thread context on PREEMPT_RT. This causes locking errors later in handle_simple_irq() which expects to be invoked with disabled interrupts.
Triggering individual interrupts in hardirq context should not lead to unexpected high latencies since this is also what the hardware controller does. Also it is used as a simulator so...
Use IRQ_WORK_INIT_HARD() to carry out the irq_work in hardirq context on PREEMPT_RT.
Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de Signed-off-by: Thomas Gleixner tglx@linutronix.de Link: https://lore.kernel.org/r/YnuZBoEVMGwKkLm+@linutronix.de Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/irq/irq_sim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 0cd02efa3a74..dd76323ea3fd 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -181,7 +181,7 @@ struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode, goto err_free_bitmap;
work_ctx->irq_count = num_irqs; - init_irq_work(&work_ctx->work, irq_sim_handle_irq); + work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq);
return work_ctx->domain;
From: "Maciej W. Rozycki" macro@orcam.me.uk
[ Upstream commit 92067440f1311dfa4d77b57a9da6b3706f5da32e ]
The frequency reported for clock sources are rounded down, which gives misleading figures, e.g.:
I/O ASIC clock frequency 24999480Hz sched_clock: 32 bits at 24MHz, resolution 40ns, wraps every 85901132779ns MIPS counter frequency 59998512Hz sched_clock: 32 bits at 59MHz, resolution 16ns, wraps every 35792281591ns
Rounding to nearest is more adequate:
I/O ASIC clock frequency 24999664Hz sched_clock: 32 bits at 25MHz, resolution 40ns, wraps every 85900499947ns MIPS counter frequency 59999728Hz sched_clock: 32 bits at 60MHz, resolution 16ns, wraps every 35791556599ns
Signed-off-by: Maciej W. Rozycki macro@orcam.me.uk Signed-off-by: Thomas Gleixner tglx@linutronix.de Acked-by: John Stultz jstultz@google.com Link: https://lore.kernel.org/r/alpine.DEB.2.21.2204240055590.9383@angie.orcam.me.... Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/time/sched_clock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index b1b9b12899f5..ee07f3ac1e5b 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -8,6 +8,7 @@ #include <linux/jiffies.h> #include <linux/ktime.h> #include <linux/kernel.h> +#include <linux/math.h> #include <linux/moduleparam.h> #include <linux/sched.h> #include <linux/sched/clock.h> @@ -199,11 +200,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
r = rate; if (r >= 4000000) { - r /= 1000000; + r = DIV_ROUND_CLOSEST(r, 1000000); r_unit = 'M'; } else { if (r >= 1000) { - r /= 1000; + r = DIV_ROUND_CLOSEST(r, 1000); r_unit = 'k'; } else { r_unit = ' ';
From: Sebastian Andrzej Siewior bigeasy@linutronix.de
[ Upstream commit 75d8cce128c516fe6cf4b8683e8fe1a59e919902 ]
irq_poll_cpu_dead() pulls the blk_cpu_iopoll backlog from the dead CPU and raises the POLL softirq with __raise_softirq_irqoff() on the CPU it is running on. That just sets the bit in the pending softirq mask.
This means the handling of the softirq is delayed until the next interrupt or a local_bh_disable/enable() pair. As a consequence the CPU on which this code runs can reach idle with the POLL softirq pending, which triggers a warning in the NOHZ idle code.
Add a local_bh_disable/enable() pair around the interrupts disabled section in irq_poll_cpu_dead(). local_bh_enable will handle the pending softirq.
[tglx: Massaged changelog and comment]
Signed-off-by: Sebastian Andrzej Siewior bigeasy@linutronix.de Signed-off-by: Thomas Gleixner tglx@linutronix.de Link: https://lore.kernel.org/r/87k0bxgl27.ffs@tglx Signed-off-by: Sasha Levin sashal@kernel.org --- lib/irq_poll.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 2f17b488d58e..2d5329a42105 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -188,14 +188,18 @@ EXPORT_SYMBOL(irq_poll_init); static int irq_poll_cpu_dead(unsigned int cpu) { /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq + * If a CPU goes away, splice its entries to the current CPU and + * set the POLL softirq bit. The local_bh_disable()/enable() pair + * ensures that it is handled. Otherwise the current CPU could + * reach idle with the POLL softirq pending. */ + local_bh_disable(); local_irq_disable(); list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), this_cpu_ptr(&blk_cpu_iopoll)); __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_enable(); + local_bh_enable();
return 0; }
From: Marc Zyngier maz@kernel.org
[ Upstream commit d802057c7c553ad426520a053da9f9fe08e2c35a ]
When booting with maxcpus=<small number>, interrupt controllers such as the GICv3 ITS may not be able to satisfy the affinity of some managed interrupts, as some of the HW resources are simply not available.
The same thing happens when loading a driver using managed interrupts while CPUs are offline.
In order to deal with this, do not try to activate such interrupt if there is no online CPU capable of handling it. Instead, place it in shutdown state. Once a capable CPU shows up, it will be activated.
Reported-by: John Garry john.garry@huawei.com Reported-by: David Decotigny ddecotig@google.com Signed-off-by: Marc Zyngier maz@kernel.org Signed-off-by: Thomas Gleixner tglx@linutronix.de Tested-by: John Garry john.garry@huawei.com Link: https://lore.kernel.org/r/20220405185040.206297-2-maz@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/irq/msi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 2bdfce5edafd..a9ee535293eb 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -818,6 +818,21 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag irqd_clr_can_reserve(irqd); if (vflags & VIRQ_NOMASK_QUIRK) irqd_set_msi_nomask_quirk(irqd); + + /* + * If the interrupt is managed but no CPU is available to + * service it, shut it down until better times. Note that + * we only do this on the !RESERVE path as x86 (the only + * architecture using this flag) deals with this in a + * different way by using a catch-all vector. + */ + if ((vflags & VIRQ_ACTIVATE) && + irqd_affinity_is_managed(irqd) && + !cpumask_intersects(irq_data_get_affinity_mask(irqd), + cpu_online_mask)) { + irqd_set_managed_shutdown(irqd); + return 0; + } }
if (!(vflags & VIRQ_ACTIVATE))
From: "Maciej W. Rozycki" macro@orcam.me.uk
[ Upstream commit 5d64089aa4a5bd3d7e00e3d6ddf4943dd34627b3 ]
Verify that the PCI IRQ Routing Table header as well as individual slot entries are all wholly contained within the BIOS memory area. Do not even call the checksum calculator if the header would overrun the area and then bail out early if any slot would.
Signed-off-by: Maciej W. Rozycki macro@orcam.me.uk Signed-off-by: Thomas Gleixner tglx@linutronix.de Link: https://lore.kernel.org/r/alpine.DEB.2.21.2203301735510.22465@angie.orcam.me... Signed-off-by: Sasha Levin sashal@kernel.org --- arch/x86/pci/irq.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 97b63e35e152..13513003303e 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -68,7 +68,8 @@ void (*pcibios_disable_irq)(struct pci_dev *dev) = pirq_disable_irq; * and perform checksum verification. */
-static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) +static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr, + u8 *limit) { struct irq_routing_table *rt; int i; @@ -78,7 +79,8 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) if (rt->signature != PIRQ_SIGNATURE || rt->version != PIRQ_VERSION || rt->size % 16 || - rt->size < sizeof(struct irq_routing_table)) + rt->size < sizeof(struct irq_routing_table) || + (limit && rt->size > limit - addr)) return NULL; sum = 0; for (i = 0; i < rt->size; i++) @@ -99,17 +101,22 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
static struct irq_routing_table * __init pirq_find_routing_table(void) { + u8 * const bios_start = (u8 *)__va(0xf0000); + u8 * const bios_end = (u8 *)__va(0x100000); u8 *addr; struct irq_routing_table *rt;
if (pirq_table_addr) { - rt = pirq_check_routing_table((u8 *) __va(pirq_table_addr)); + rt = pirq_check_routing_table((u8 *)__va(pirq_table_addr), + NULL); if (rt) return rt; printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n"); } - for (addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { - rt = pirq_check_routing_table(addr); + for (addr = bios_start; + addr < bios_end - sizeof(struct irq_routing_table); + addr += 16) { + rt = pirq_check_routing_table(addr, bios_end); if (rt) return rt; }
linux-stable-mirror@lists.linaro.org