Today it can happen that an event channel is being removed from the system while the event handling loop is active. This can lead to a race resulting in crashes or WARN() splats.
Fix this problem by using a rwlock taken as reader in the event handling loop and as writer when removing an event channel.
As the observed problem was a NULL dereference in evtchn_from_irq() make this function more robust against races by testing the irq_info pointer to be not NULL before dereferencing it.
Cc: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Cc: stable@vger.kernel.org Reported-by: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Signed-off-by: Juergen Gross jgross@suse.com --- drivers/xen/events/events_base.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 3a791c8485d0..178a471906de 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <linux/irqnr.h> #include <linux/pci.h> +#include <linux/spinlock.h>
#ifdef CONFIG_X86 #include <asm/desc.h> @@ -70,6 +71,9 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock);
+/* Lock protecting event handling loop against removing event channels. */ +static DEFINE_RWLOCK(evtchn_rwlock); + static LIST_HEAD(xen_irq_list_head);
/* IRQ <-> VIRQ mapping. */ @@ -247,10 +251,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) */ evtchn_port_t evtchn_from_irq(unsigned irq) { - if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) + struct irq_info *info = NULL; + + if (likely(irq < nr_irqs)) + info = info_for_irq(irq); + if (WARN(!info, "Invalid irq %d!\n", irq)) return 0;
- return info_for_irq(irq)->evtchn; + return info->evtchn; }
unsigned int irq_from_evtchn(evtchn_port_t evtchn) @@ -603,6 +611,7 @@ static void __unbind_from_irq(unsigned int irq) { evtchn_port_t evtchn = evtchn_from_irq(irq); struct irq_info *info = irq_get_handler_data(irq); + unsigned long flags;
if (info->refcnt > 0) { info->refcnt--; @@ -610,8 +619,10 @@ static void __unbind_from_irq(unsigned int irq) return; }
+ write_lock_irqsave(&evtchn_rwlock, flags); + if (VALID_EVTCHN(evtchn)) { - unsigned int cpu = cpu_from_irq(irq); + unsigned int cpu = cpu_from_irq(irq);;
xen_evtchn_close(evtchn);
@@ -629,6 +640,8 @@ static void __unbind_from_irq(unsigned int irq) xen_irq_info_cleanup(info); }
+ write_unlock_irqrestore(&evtchn_rwlock, flags); + xen_free_irq(irq); }
@@ -1219,6 +1232,8 @@ static void __xen_evtchn_do_upcall(void) struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int cpu = smp_processor_id();
+ read_lock(&evtchn_rwlock); + do { vcpu_info->evtchn_upcall_pending = 0;
@@ -1229,6 +1244,8 @@ static void __xen_evtchn_do_upcall(void) virt_rmb(); /* Hypervisor can set upcall pending. */
} while (vcpu_info->evtchn_upcall_pending); + + read_unlock(&evtchn_rwlock); }
void xen_evtchn_do_upcall(struct pt_regs *regs)
On 17.04.2020 13:54, Juergen Gross wrote:
@@ -603,6 +611,7 @@ static void __unbind_from_irq(unsigned int irq) { evtchn_port_t evtchn = evtchn_from_irq(irq); struct irq_info *info = irq_get_handler_data(irq);
- unsigned long flags;
if (info->refcnt > 0) { info->refcnt--; @@ -610,8 +619,10 @@ static void __unbind_from_irq(unsigned int irq) return; }
- write_lock_irqsave(&evtchn_rwlock, flags);
This placement looks odd - it doesn't guard the earlier if() (i.e. isn't covering as wide a scope as one might expect) but also isn't inside ...
if (VALID_EVTCHN(evtchn)) {
... this if(), which would be the minimal locked region needed). While you have a comment at the declaration of the lock, I'd recommend to have another one here clarifying that the less than expected locked region is because the lock is to guard only against removal, not also against other races (which I assume are taken care of by other means).
unsigned int cpu = cpu_from_irq(irq);
unsigned int cpu = cpu_from_irq(irq);;
Stray and unwanted change?
@@ -629,6 +640,8 @@ static void __unbind_from_irq(unsigned int irq) xen_irq_info_cleanup(info); }
- write_unlock_irqrestore(&evtchn_rwlock, flags);
- xen_free_irq(irq);
} @@ -1219,6 +1232,8 @@ static void __xen_evtchn_do_upcall(void) struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int cpu = smp_processor_id();
- read_lock(&evtchn_rwlock);
- do { vcpu_info->evtchn_upcall_pending = 0;
@@ -1229,6 +1244,8 @@ static void __xen_evtchn_do_upcall(void) virt_rmb(); /* Hypervisor can set upcall pending. */ } while (vcpu_info->evtchn_upcall_pending);
- read_unlock(&evtchn_rwlock);
}
So you guard against removal, but not against insertion - is insertion done in a way that partial setup is never a problem? (Looking at in particular the FIFO code I can't easily convince myself this is the case.)
Jan
On 17.04.20 14:26, Jan Beulich wrote:
On 17.04.2020 13:54, Juergen Gross wrote:
@@ -603,6 +611,7 @@ static void __unbind_from_irq(unsigned int irq) { evtchn_port_t evtchn = evtchn_from_irq(irq); struct irq_info *info = irq_get_handler_data(irq);
- unsigned long flags;
if (info->refcnt > 0) { info->refcnt--; @@ -610,8 +619,10 @@ static void __unbind_from_irq(unsigned int irq) return; }
- write_lock_irqsave(&evtchn_rwlock, flags);
This placement looks odd - it doesn't guard the earlier if() (i.e. isn't covering as wide a scope as one might expect) but also isn't inside ...
if (VALID_EVTCHN(evtchn)) {
... this if(), which would be the minimal locked region needed).
Right, I can move the lock inside this if ().
While you have a comment at the declaration of the lock, I'd recommend to have another one here clarifying that the less than expected locked region is because the lock is to guard only against removal, not also against other races (which I assume are taken care of by other means).
Indeed, there is mutex_lock(&irq_mapping_update_lock) around all invocations of __unbind_from_irq().
unsigned int cpu = cpu_from_irq(irq);
unsigned int cpu = cpu_from_irq(irq);;
Stray and unwanted change?
Yes.
@@ -629,6 +640,8 @@ static void __unbind_from_irq(unsigned int irq) xen_irq_info_cleanup(info); }
- write_unlock_irqrestore(&evtchn_rwlock, flags);
- xen_free_irq(irq); }
@@ -1219,6 +1232,8 @@ static void __xen_evtchn_do_upcall(void) struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int cpu = smp_processor_id();
- read_lock(&evtchn_rwlock);
- do { vcpu_info->evtchn_upcall_pending = 0;
@@ -1229,6 +1244,8 @@ static void __xen_evtchn_do_upcall(void) virt_rmb(); /* Hypervisor can set upcall pending. */ } while (vcpu_info->evtchn_upcall_pending);
- read_unlock(&evtchn_rwlock); }
So you guard against removal, but not against insertion - is insertion done in a way that partial setup is never a problem? (Looking at in particular the FIFO code I can't easily convince myself this is the case.)
The IRQ related to the event channel is enabled only after the event channel is setup completely. This is done by request_irq() or similar calls (e.g. request_threaded_irq()).
I'll add a comment explaining that.
Juergen
On 17.04.20 13:54, Juergen Gross wrote:
Today it can happen that an event channel is being removed from the system while the event handling loop is active. This can lead to a race resulting in crashes or WARN() splats.
Fix this problem by using a rwlock taken as reader in the event handling loop and as writer when removing an event channel.
As the observed problem was a NULL dereference in evtchn_from_irq() make this function more robust against races by testing the irq_info pointer to be not NULL before dereferencing it.
Cc: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Cc: stable@vger.kernel.org Reported-by: Marek Marczykowski-Górecki marmarek@invisiblethingslab.com Signed-off-by: Juergen Gross jgross@suse.com
Please ignore, script went wild.
Juergen
linux-stable-mirror@lists.linaro.org