The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
7c7f9bc986e6 ("serial: Deassert Transmit Enable on probe in driver-specific way")
60f361722ad2 ("serial: fsl_lpuart: Reset prior to registration")
44b27aec9d96 ("serial: core, 8250: set RS485 termination GPIO in serial core")
ae50bb275283 ("serial: take termios_rwsem for ->rs485_config() & pass termios as param")
84f2faa7852e ("serial: 8250: Remove serial_rs485 sanitization from em485")
7195eefb38d7 ("serial: fsl_lpuart: Call core's sanitization and remove custom one")
596a9171472b ("serial: Clear rs485 struct when non-RS485 mode is set")
be2e2cb1d281 ("serial: Sanitize rs485_struct")
59c221f8e126 ("serial: 8250_exar: Fill in rs485_supported")
2dbd0c14ebe8 ("serial: Move serial_rs485 sanitization into separate function")
8322b1f52715 ("serial: Add uart_rs485_config()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7c7f9bc986e698873b489c371a08f206979d06b7 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Thu, 22 Sep 2022 18:27:33 +0200
Subject: [PATCH] serial: Deassert Transmit Enable on probe in driver-specific
way
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
When a UART port is newly registered, uart_configure_port() seeks to
deassert RS485 Transmit Enable by setting the RTS bit in port->mctrl.
However a number of UART drivers interpret a set RTS bit as *assertion*
instead of deassertion: Affected drivers include those using
serial8250_em485_config() (except 8250_bcm2835aux.c) and some using
mctrl_gpio (e.g. imx.c).
Since the interpretation of the RTS bit is driver-specific, it is not
suitable as a means to centrally deassert Transmit Enable in the serial
core. Instead, the serial core must call on drivers to deassert it in
their driver-specific way. One way to achieve that is to call
->rs485_config(). It implicitly deasserts Transmit Enable.
So amend uart_configure_port() and uart_resume_port() to invoke
uart_rs485_config(). That allows removing calls to uart_rs485_config()
from drivers' ->probe() hooks and declaring the function static.
Skip any invocation of ->set_mctrl() if RS485 is enabled. RS485 has no
hardware flow control, so the modem control lines are irrelevant and
need not be touched. When leaving RS485 mode, reset the modem control
lines to the state stored in port->mctrl. That way, UARTs which are
muxed between RS485 and RS232 transceivers drive the lines correctly
when switched to RS232. (serial8250_do_startup() historically raises
the OUT1 modem signal because otherwise interrupts are not signaled on
ancient PC UARTs, but I believe that no longer applies to modern,
RS485-capable UARTs and is thus safe to be skipped.)
imx.c modifies port->mctrl whenever Transmit Enable is asserted and
deasserted. Stop it from doing that so port->mctrl reflects the RS232
line state.
8250_omap.c deasserts Transmit Enable on ->runtime_resume() by calling
->set_mctrl(). Because that is now a no-op in RS485 mode, amend the
function to call serial8250_em485_stop_tx().
fsl_lpuart.c retrieves and applies the RS485 device tree properties
after registering the UART port. Because applying now happens on
registration in uart_configure_port(), move retrieval of the properties
ahead of uart_add_one_port().
Link: https://lore.kernel.org/all/20220329085050.311408-1-matthias.schiffer@ew.tq…
Link: https://lore.kernel.org/all/8f538a8903795f22f9acc94a9a31b03c9c4ccacb.camel@…
Fixes: d3b3404df318 ("serial: Fix incorrect rs485 polarity on uart open")
Cc: stable(a)vger.kernel.org # v4.14+
Reported-by: Matthias Schiffer <matthias.schiffer(a)ew.tq-group.com>
Reported-by: Roosen Henri <Henri.Roosen(a)ginzinger.com>
Tested-by: Matthias Schiffer <matthias.schiffer(a)ew.tq-group.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Link: https://lore.kernel.org/r/2de36eba3fbe11278d5002e4e501afe0ceaca039.16638638…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 871f8a050513..41b8c6b27136 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -342,6 +342,9 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
omap8250_update_mdr1(up, priv);
up->port.ops->set_mctrl(&up->port, up->port.mctrl);
+
+ if (up->port.rs485.flags & SER_RS485_ENABLED)
+ serial8250_em485_stop_tx(up);
}
/*
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 0052cf899e29..8e9f247590bd 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1632,7 +1632,6 @@ static int pci_fintek_init(struct pci_dev *dev)
resource_size_t bar_data[3];
u8 config_base;
struct serial_private *priv = pci_get_drvdata(dev);
- struct uart_8250_port *port;
if (!(pci_resource_flags(dev, 5) & IORESOURCE_IO) ||
!(pci_resource_flags(dev, 4) & IORESOURCE_IO) ||
@@ -1679,13 +1678,7 @@ static int pci_fintek_init(struct pci_dev *dev)
pci_write_config_byte(dev, config_base + 0x06, dev->irq);
- if (priv) {
- /* re-apply RS232/485 mode when
- * pciserial_resume_ports()
- */
- port = serial8250_get_port(priv->line[i]);
- uart_rs485_config(&port->port);
- } else {
+ if (!priv) {
/* First init without port data
* force init to RS232 Mode
*/
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 6a3c2ead0f17..dfdd2db95cb3 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -600,7 +600,7 @@ EXPORT_SYMBOL_GPL(serial8250_rpm_put);
static int serial8250_em485_init(struct uart_8250_port *p)
{
if (p->em485)
- return 0;
+ goto deassert_rts;
p->em485 = kmalloc(sizeof(struct uart_8250_em485), GFP_ATOMIC);
if (!p->em485)
@@ -616,7 +616,9 @@ static int serial8250_em485_init(struct uart_8250_port *p)
p->em485->active_timer = NULL;
p->em485->tx_stopped = true;
- p->rs485_stop_tx(p);
+deassert_rts:
+ if (p->em485->tx_stopped)
+ p->rs485_stop_tx(p);
return 0;
}
@@ -2048,6 +2050,9 @@ EXPORT_SYMBOL_GPL(serial8250_do_set_mctrl);
static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
{
+ if (port->rs485.flags & SER_RS485_ENABLED)
+ return;
+
if (port->set_mctrl)
port->set_mctrl(port, mctrl);
else
@@ -3192,9 +3197,6 @@ static void serial8250_config_port(struct uart_port *port, int flags)
if (flags & UART_CONFIG_TYPE)
autoconfig(up);
- if (port->rs485.flags & SER_RS485_ENABLED)
- uart_rs485_config(port);
-
/* if access method is AU, it is a 16550 with a quirk */
if (port->type == PORT_16550A && port->iotype == UPIO_AU)
up->bugs |= UART_BUG_NOMSR;
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index fadb49086102..67fa113f77d4 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -2726,15 +2726,13 @@ static int lpuart_probe(struct platform_device *pdev)
if (ret)
goto failed_reset;
- ret = uart_add_one_port(&lpuart_reg, &sport->port);
- if (ret)
- goto failed_attach_port;
-
ret = uart_get_rs485_mode(&sport->port);
if (ret)
goto failed_get_rs485;
- uart_rs485_config(&sport->port);
+ ret = uart_add_one_port(&lpuart_reg, &sport->port);
+ if (ret)
+ goto failed_attach_port;
ret = devm_request_irq(&pdev->dev, sport->port.irq, handler, 0,
DRIVER_NAME, sport);
@@ -2744,9 +2742,9 @@ static int lpuart_probe(struct platform_device *pdev)
return 0;
failed_irq_request:
-failed_get_rs485:
uart_remove_one_port(&lpuart_reg, &sport->port);
failed_attach_port:
+failed_get_rs485:
failed_reset:
lpuart_disable_clks(sport);
return ret;
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 5875ee66492b..05b432dc7a85 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -380,8 +380,7 @@ static void imx_uart_rts_active(struct imx_port *sport, u32 *ucr2)
{
*ucr2 &= ~(UCR2_CTSC | UCR2_CTS);
- sport->port.mctrl |= TIOCM_RTS;
- mctrl_gpio_set(sport->gpios, sport->port.mctrl);
+ mctrl_gpio_set(sport->gpios, sport->port.mctrl | TIOCM_RTS);
}
/* called with port.lock taken and irqs caller dependent */
@@ -390,8 +389,7 @@ static void imx_uart_rts_inactive(struct imx_port *sport, u32 *ucr2)
*ucr2 &= ~UCR2_CTSC;
*ucr2 |= UCR2_CTS;
- sport->port.mctrl &= ~TIOCM_RTS;
- mctrl_gpio_set(sport->gpios, sport->port.mctrl);
+ mctrl_gpio_set(sport->gpios, sport->port.mctrl & ~TIOCM_RTS);
}
static void start_hrtimer_ms(struct hrtimer *hrt, unsigned long msec)
@@ -2347,8 +2345,6 @@ static int imx_uart_probe(struct platform_device *pdev)
dev_err(&pdev->dev,
"low-active RTS not possible when receiver is off, enabling receiver\n");
- uart_rs485_config(&sport->port);
-
/* Disable interrupts before requesting them */
ucr1 = imx_uart_readl(sport, UCR1);
ucr1 &= ~(UCR1_ADEN | UCR1_TRDYEN | UCR1_IDEN | UCR1_RRDYEN | UCR1_RTSDEN);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index c7113182275a..179ee199df34 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -158,15 +158,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
unsigned long flags;
unsigned int old;
- if (port->rs485.flags & SER_RS485_ENABLED) {
- set &= ~TIOCM_RTS;
- clear &= ~TIOCM_RTS;
- }
-
spin_lock_irqsave(&port->lock, flags);
old = port->mctrl;
port->mctrl = (old & ~clear) | set;
- if (old != port->mctrl)
+ if (old != port->mctrl && !(port->rs485.flags & SER_RS485_ENABLED))
port->ops->set_mctrl(port, port->mctrl);
spin_unlock_irqrestore(&port->lock, flags);
}
@@ -1391,7 +1386,7 @@ static void uart_set_rs485_termination(struct uart_port *port,
!!(rs485->flags & SER_RS485_TERMINATE_BUS));
}
-int uart_rs485_config(struct uart_port *port)
+static int uart_rs485_config(struct uart_port *port)
{
struct serial_rs485 *rs485 = &port->rs485;
int ret;
@@ -1405,7 +1400,6 @@ int uart_rs485_config(struct uart_port *port)
return ret;
}
-EXPORT_SYMBOL_GPL(uart_rs485_config);
static int uart_get_rs485_config(struct uart_port *port,
struct serial_rs485 __user *rs485)
@@ -1444,8 +1438,13 @@ static int uart_set_rs485_config(struct tty_struct *tty, struct uart_port *port,
spin_lock_irqsave(&port->lock, flags);
ret = port->rs485_config(port, &tty->termios, &rs485);
- if (!ret)
+ if (!ret) {
port->rs485 = rs485;
+
+ /* Reset RTS and other mctrl lines when disabling RS485 */
+ if (!(rs485.flags & SER_RS485_ENABLED))
+ port->ops->set_mctrl(port, port->mctrl);
+ }
spin_unlock_irqrestore(&port->lock, flags);
if (ret)
return ret;
@@ -2352,7 +2351,8 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
spin_lock_irq(&uport->lock);
ops->stop_tx(uport);
- ops->set_mctrl(uport, 0);
+ if (!(uport->rs485.flags & SER_RS485_ENABLED))
+ ops->set_mctrl(uport, 0);
/* save mctrl so it can be restored on resume */
mctrl = uport->mctrl;
uport->mctrl = 0;
@@ -2440,7 +2440,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
uart_change_pm(state, UART_PM_STATE_ON);
spin_lock_irq(&uport->lock);
- ops->set_mctrl(uport, 0);
+ if (!(uport->rs485.flags & SER_RS485_ENABLED))
+ ops->set_mctrl(uport, 0);
spin_unlock_irq(&uport->lock);
if (console_suspend_enabled || !uart_console(uport)) {
/* Protected by port mutex for now */
@@ -2451,7 +2452,10 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
if (tty)
uart_change_speed(tty, state, NULL);
spin_lock_irq(&uport->lock);
- ops->set_mctrl(uport, uport->mctrl);
+ if (!(uport->rs485.flags & SER_RS485_ENABLED))
+ ops->set_mctrl(uport, uport->mctrl);
+ else
+ uart_rs485_config(uport);
ops->start_tx(uport);
spin_unlock_irq(&uport->lock);
tty_port_set_initialized(port, 1);
@@ -2558,10 +2562,10 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
*/
spin_lock_irqsave(&port->lock, flags);
port->mctrl &= TIOCM_DTR;
- if (port->rs485.flags & SER_RS485_ENABLED &&
- !(port->rs485.flags & SER_RS485_RTS_AFTER_SEND))
- port->mctrl |= TIOCM_RTS;
- port->ops->set_mctrl(port, port->mctrl);
+ if (!(port->rs485.flags & SER_RS485_ENABLED))
+ port->ops->set_mctrl(port, port->mctrl);
+ else
+ uart_rs485_config(port);
spin_unlock_irqrestore(&port->lock, flags);
/*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index b4c21f84ad79..d657f2a42a7b 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -951,5 +951,4 @@ static inline int uart_handle_break(struct uart_port *port)
!((cflag) & CLOCAL))
int uart_get_rs485_mode(struct uart_port *port);
-int uart_rs485_config(struct uart_port *port);
#endif /* LINUX_SERIAL_CORE_H */
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
5c13a4a0291b ("xen/gntdev: Accommodate VMA splitting")
dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
ce2f46f3531a ("xen/gntdev: fix unmap notification order")
f28347cc6639 ("Xen/gntdev: don't ignore kernel unmapping error")
30dcc56bba91 ("xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests")
970655aa9b42 ("xen/gntdev: fix gntdev_mmap() error exit path")
bce21a2b48ed ("Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF}")
36caa3fedf06 ("Xen/gntdev: don't needlessly allocate k{,un}map_ops[]")
8310b77b48c5 ("Xen/gnttab: handle p2m update errors on a per-slot basis")
36bf1dfb8b26 ("xen/arm: don't ignore return errors from set_phys_to_machine")
ebee0eab0859 ("Xen/gntdev: correct error checking in gntdev_map_grant_pages()")
dbe5283605b3 ("Xen/gntdev: correct dev_bus_addr handling in gntdev_map_grant_pages()")
0102e4efda76 ("xen: Use evtchn_type_t as a type for event channels")
9293724192a7 ("xen/gntdev: Do not use mm notifiers with autotranslating guests")
b3f7931f5c61 ("xen/gntdev: switch from kcalloc() to kvcalloc()")
3b06ac6707c1 ("xen/gntdev: replace global limit of mapped pages by limit per call")
d3eeb1d77c5d ("xen/gntdev: use mmu_interval_notifier_insert")
ee7f5225dc3c ("xen: Stop abusing DT of_dma_configure API")
bce5963bcb4f ("xen/events: fix binding user event channels to cpus")
8b1e0f81fb6f ("mm/pgtable: drop pgtable_t variable from pte_fn_t functions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b(a)runbox.com>
Date: Sun, 2 Oct 2022 18:20:06 -0400
Subject: [PATCH] xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable(a)vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b(a)runbox.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross(a)suse.com>
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 40ef379c28ab..9c286b2a1900 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
};
struct gntdev_grant_map {
+ atomic_t in_use;
struct mmu_interval_notifier notifier;
+ bool notifier_init;
struct list_head next;
- struct vm_area_struct *vma;
int index;
int count;
int flags;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eb0586b9767d..4d9a3050de6a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
*/
}
+ if (use_ptemod && map->notifier_init)
+ mmu_interval_notifier_remove(&map->notifier);
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
{
struct gntdev_grant_map *map = data;
- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
(1 << _GNTMAP_guest_avail0);
u64 pte_maddr;
@@ -508,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
- if (use_ptemod) {
- WARN_ON(map->vma != vma);
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
+
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
}
@@ -540,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
+ unsigned long map_start, map_end;
if (!mmu_notifier_range_blockable(range))
return false;
+ map_start = map->pages_vm_start;
+ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
+
/*
* If the VMA is split or otherwise changed the notifier is not
* updated, but we don't want to process VA's outside the modified
* VMA. FIXME: It would be much more understandable to just prevent
* modifying the VMA in the first place.
*/
- if (map->vma->vm_start >= range->end ||
- map->vma->vm_end <= range->start)
+ if (map_start >= range->end || map_end <= range->start)
return true;
- mstart = max(range->start, map->vma->vm_start);
- mend = min(range->end, map->vma->vm_end);
+ mstart = max(range->start, map_start);
+ mend = min(range->end, map_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end,
- range->start, range->end, mstart, mend);
- unmap_grant_pages(map,
- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
- (mend - mstart) >> PAGE_SHIFT);
+ map->index, map->count, map_start, map_end,
+ range->start, range->end, mstart, mend);
+ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
+ (mend - mstart) >> PAGE_SHIFT);
return true;
}
@@ -1042,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
return -EINVAL;
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
- index, count, vma->vm_start, vma->vm_pgoff);
+ index, count, vma->vm_start, vma->vm_pgoff);
mutex_lock(&priv->lock);
map = gntdev_find_map_index(priv, index, count);
if (!map)
goto unlock_out;
- if (use_ptemod && map->vma)
- goto unlock_out;
- if (atomic_read(&map->live_grants)) {
- err = -EAGAIN;
+ if (!atomic_add_unless(&map->in_use, 1, 1))
goto unlock_out;
- }
+
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1074,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
+ map->pages_vm_start = vma->vm_start;
+
if (use_ptemod) {
- map->vma = vma;
err = mmu_interval_notifier_insert_locked(
&map->notifier, vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
- if (err) {
- map->vma = NULL;
+ if (err)
goto out_unlock_put;
- }
+
+ map->notifier_init = true;
}
mutex_unlock(&priv->lock);
@@ -1099,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
*/
mmu_interval_read_begin(&map->notifier);
- map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start,
find_grant_ptes, map);
@@ -1128,13 +1125,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
out_unlock_put:
mutex_unlock(&priv->lock);
out_put_map:
- if (use_ptemod) {
+ if (use_ptemod)
unmap_grant_pages(map, 0, map->count);
- if (map->vma) {
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
- }
gntdev_put_map(priv, map);
return err;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
5c13a4a0291b ("xen/gntdev: Accommodate VMA splitting")
dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
ce2f46f3531a ("xen/gntdev: fix unmap notification order")
f28347cc6639 ("Xen/gntdev: don't ignore kernel unmapping error")
30dcc56bba91 ("xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests")
970655aa9b42 ("xen/gntdev: fix gntdev_mmap() error exit path")
bce21a2b48ed ("Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF}")
36caa3fedf06 ("Xen/gntdev: don't needlessly allocate k{,un}map_ops[]")
8310b77b48c5 ("Xen/gnttab: handle p2m update errors on a per-slot basis")
36bf1dfb8b26 ("xen/arm: don't ignore return errors from set_phys_to_machine")
ebee0eab0859 ("Xen/gntdev: correct error checking in gntdev_map_grant_pages()")
dbe5283605b3 ("Xen/gntdev: correct dev_bus_addr handling in gntdev_map_grant_pages()")
0102e4efda76 ("xen: Use evtchn_type_t as a type for event channels")
9293724192a7 ("xen/gntdev: Do not use mm notifiers with autotranslating guests")
b3f7931f5c61 ("xen/gntdev: switch from kcalloc() to kvcalloc()")
3b06ac6707c1 ("xen/gntdev: replace global limit of mapped pages by limit per call")
d3eeb1d77c5d ("xen/gntdev: use mmu_interval_notifier_insert")
ee7f5225dc3c ("xen: Stop abusing DT of_dma_configure API")
bce5963bcb4f ("xen/events: fix binding user event channels to cpus")
8b1e0f81fb6f ("mm/pgtable: drop pgtable_t variable from pte_fn_t functions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b(a)runbox.com>
Date: Sun, 2 Oct 2022 18:20:06 -0400
Subject: [PATCH] xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable(a)vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b(a)runbox.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross(a)suse.com>
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 40ef379c28ab..9c286b2a1900 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
};
struct gntdev_grant_map {
+ atomic_t in_use;
struct mmu_interval_notifier notifier;
+ bool notifier_init;
struct list_head next;
- struct vm_area_struct *vma;
int index;
int count;
int flags;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eb0586b9767d..4d9a3050de6a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
*/
}
+ if (use_ptemod && map->notifier_init)
+ mmu_interval_notifier_remove(&map->notifier);
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
{
struct gntdev_grant_map *map = data;
- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
(1 << _GNTMAP_guest_avail0);
u64 pte_maddr;
@@ -508,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
- if (use_ptemod) {
- WARN_ON(map->vma != vma);
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
+
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
}
@@ -540,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
+ unsigned long map_start, map_end;
if (!mmu_notifier_range_blockable(range))
return false;
+ map_start = map->pages_vm_start;
+ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
+
/*
* If the VMA is split or otherwise changed the notifier is not
* updated, but we don't want to process VA's outside the modified
* VMA. FIXME: It would be much more understandable to just prevent
* modifying the VMA in the first place.
*/
- if (map->vma->vm_start >= range->end ||
- map->vma->vm_end <= range->start)
+ if (map_start >= range->end || map_end <= range->start)
return true;
- mstart = max(range->start, map->vma->vm_start);
- mend = min(range->end, map->vma->vm_end);
+ mstart = max(range->start, map_start);
+ mend = min(range->end, map_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end,
- range->start, range->end, mstart, mend);
- unmap_grant_pages(map,
- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
- (mend - mstart) >> PAGE_SHIFT);
+ map->index, map->count, map_start, map_end,
+ range->start, range->end, mstart, mend);
+ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
+ (mend - mstart) >> PAGE_SHIFT);
return true;
}
@@ -1042,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
return -EINVAL;
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
- index, count, vma->vm_start, vma->vm_pgoff);
+ index, count, vma->vm_start, vma->vm_pgoff);
mutex_lock(&priv->lock);
map = gntdev_find_map_index(priv, index, count);
if (!map)
goto unlock_out;
- if (use_ptemod && map->vma)
- goto unlock_out;
- if (atomic_read(&map->live_grants)) {
- err = -EAGAIN;
+ if (!atomic_add_unless(&map->in_use, 1, 1))
goto unlock_out;
- }
+
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1074,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
+ map->pages_vm_start = vma->vm_start;
+
if (use_ptemod) {
- map->vma = vma;
err = mmu_interval_notifier_insert_locked(
&map->notifier, vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
- if (err) {
- map->vma = NULL;
+ if (err)
goto out_unlock_put;
- }
+
+ map->notifier_init = true;
}
mutex_unlock(&priv->lock);
@@ -1099,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
*/
mmu_interval_read_begin(&map->notifier);
- map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start,
find_grant_ptes, map);
@@ -1128,13 +1125,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
out_unlock_put:
mutex_unlock(&priv->lock);
out_put_map:
- if (use_ptemod) {
+ if (use_ptemod)
unmap_grant_pages(map, 0, map->count);
- if (map->vma) {
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
- }
gntdev_put_map(priv, map);
return err;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
5c13a4a0291b ("xen/gntdev: Accommodate VMA splitting")
dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
ce2f46f3531a ("xen/gntdev: fix unmap notification order")
f28347cc6639 ("Xen/gntdev: don't ignore kernel unmapping error")
30dcc56bba91 ("xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests")
970655aa9b42 ("xen/gntdev: fix gntdev_mmap() error exit path")
bce21a2b48ed ("Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF}")
36caa3fedf06 ("Xen/gntdev: don't needlessly allocate k{,un}map_ops[]")
8310b77b48c5 ("Xen/gnttab: handle p2m update errors on a per-slot basis")
36bf1dfb8b26 ("xen/arm: don't ignore return errors from set_phys_to_machine")
ebee0eab0859 ("Xen/gntdev: correct error checking in gntdev_map_grant_pages()")
dbe5283605b3 ("Xen/gntdev: correct dev_bus_addr handling in gntdev_map_grant_pages()")
0102e4efda76 ("xen: Use evtchn_type_t as a type for event channels")
9293724192a7 ("xen/gntdev: Do not use mm notifiers with autotranslating guests")
b3f7931f5c61 ("xen/gntdev: switch from kcalloc() to kvcalloc()")
3b06ac6707c1 ("xen/gntdev: replace global limit of mapped pages by limit per call")
d3eeb1d77c5d ("xen/gntdev: use mmu_interval_notifier_insert")
ee7f5225dc3c ("xen: Stop abusing DT of_dma_configure API")
bce5963bcb4f ("xen/events: fix binding user event channels to cpus")
8b1e0f81fb6f ("mm/pgtable: drop pgtable_t variable from pte_fn_t functions")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b(a)runbox.com>
Date: Sun, 2 Oct 2022 18:20:06 -0400
Subject: [PATCH] xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable(a)vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b(a)runbox.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross(a)suse.com>
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 40ef379c28ab..9c286b2a1900 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
};
struct gntdev_grant_map {
+ atomic_t in_use;
struct mmu_interval_notifier notifier;
+ bool notifier_init;
struct list_head next;
- struct vm_area_struct *vma;
int index;
int count;
int flags;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eb0586b9767d..4d9a3050de6a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
*/
}
+ if (use_ptemod && map->notifier_init)
+ mmu_interval_notifier_remove(&map->notifier);
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
{
struct gntdev_grant_map *map = data;
- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
(1 << _GNTMAP_guest_avail0);
u64 pte_maddr;
@@ -508,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
- if (use_ptemod) {
- WARN_ON(map->vma != vma);
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
+
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
}
@@ -540,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
+ unsigned long map_start, map_end;
if (!mmu_notifier_range_blockable(range))
return false;
+ map_start = map->pages_vm_start;
+ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
+
/*
* If the VMA is split or otherwise changed the notifier is not
* updated, but we don't want to process VA's outside the modified
* VMA. FIXME: It would be much more understandable to just prevent
* modifying the VMA in the first place.
*/
- if (map->vma->vm_start >= range->end ||
- map->vma->vm_end <= range->start)
+ if (map_start >= range->end || map_end <= range->start)
return true;
- mstart = max(range->start, map->vma->vm_start);
- mend = min(range->end, map->vma->vm_end);
+ mstart = max(range->start, map_start);
+ mend = min(range->end, map_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end,
- range->start, range->end, mstart, mend);
- unmap_grant_pages(map,
- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
- (mend - mstart) >> PAGE_SHIFT);
+ map->index, map->count, map_start, map_end,
+ range->start, range->end, mstart, mend);
+ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
+ (mend - mstart) >> PAGE_SHIFT);
return true;
}
@@ -1042,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
return -EINVAL;
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
- index, count, vma->vm_start, vma->vm_pgoff);
+ index, count, vma->vm_start, vma->vm_pgoff);
mutex_lock(&priv->lock);
map = gntdev_find_map_index(priv, index, count);
if (!map)
goto unlock_out;
- if (use_ptemod && map->vma)
- goto unlock_out;
- if (atomic_read(&map->live_grants)) {
- err = -EAGAIN;
+ if (!atomic_add_unless(&map->in_use, 1, 1))
goto unlock_out;
- }
+
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1074,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
+ map->pages_vm_start = vma->vm_start;
+
if (use_ptemod) {
- map->vma = vma;
err = mmu_interval_notifier_insert_locked(
&map->notifier, vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
- if (err) {
- map->vma = NULL;
+ if (err)
goto out_unlock_put;
- }
+
+ map->notifier_init = true;
}
mutex_unlock(&priv->lock);
@@ -1099,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
*/
mmu_interval_read_begin(&map->notifier);
- map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start,
find_grant_ptes, map);
@@ -1128,13 +1125,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
out_unlock_put:
mutex_unlock(&priv->lock);
out_put_map:
- if (use_ptemod) {
+ if (use_ptemod)
unmap_grant_pages(map, 0, map->count);
- if (map->vma) {
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
- }
gntdev_put_map(priv, map);
return err;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
5c13a4a0291b ("xen/gntdev: Accommodate VMA splitting")
dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
ce2f46f3531a ("xen/gntdev: fix unmap notification order")
f28347cc6639 ("Xen/gntdev: don't ignore kernel unmapping error")
30dcc56bba91 ("xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests")
970655aa9b42 ("xen/gntdev: fix gntdev_mmap() error exit path")
bce21a2b48ed ("Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF}")
36caa3fedf06 ("Xen/gntdev: don't needlessly allocate k{,un}map_ops[]")
8310b77b48c5 ("Xen/gnttab: handle p2m update errors on a per-slot basis")
36bf1dfb8b26 ("xen/arm: don't ignore return errors from set_phys_to_machine")
ebee0eab0859 ("Xen/gntdev: correct error checking in gntdev_map_grant_pages()")
dbe5283605b3 ("Xen/gntdev: correct dev_bus_addr handling in gntdev_map_grant_pages()")
0102e4efda76 ("xen: Use evtchn_type_t as a type for event channels")
9293724192a7 ("xen/gntdev: Do not use mm notifiers with autotranslating guests")
b3f7931f5c61 ("xen/gntdev: switch from kcalloc() to kvcalloc()")
3b06ac6707c1 ("xen/gntdev: replace global limit of mapped pages by limit per call")
d3eeb1d77c5d ("xen/gntdev: use mmu_interval_notifier_insert")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b(a)runbox.com>
Date: Sun, 2 Oct 2022 18:20:06 -0400
Subject: [PATCH] xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable(a)vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b(a)runbox.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross(a)suse.com>
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 40ef379c28ab..9c286b2a1900 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
};
struct gntdev_grant_map {
+ atomic_t in_use;
struct mmu_interval_notifier notifier;
+ bool notifier_init;
struct list_head next;
- struct vm_area_struct *vma;
int index;
int count;
int flags;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eb0586b9767d..4d9a3050de6a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
*/
}
+ if (use_ptemod && map->notifier_init)
+ mmu_interval_notifier_remove(&map->notifier);
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
{
struct gntdev_grant_map *map = data;
- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
(1 << _GNTMAP_guest_avail0);
u64 pte_maddr;
@@ -508,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
- if (use_ptemod) {
- WARN_ON(map->vma != vma);
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
+
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
}
@@ -540,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
+ unsigned long map_start, map_end;
if (!mmu_notifier_range_blockable(range))
return false;
+ map_start = map->pages_vm_start;
+ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
+
/*
* If the VMA is split or otherwise changed the notifier is not
* updated, but we don't want to process VA's outside the modified
* VMA. FIXME: It would be much more understandable to just prevent
* modifying the VMA in the first place.
*/
- if (map->vma->vm_start >= range->end ||
- map->vma->vm_end <= range->start)
+ if (map_start >= range->end || map_end <= range->start)
return true;
- mstart = max(range->start, map->vma->vm_start);
- mend = min(range->end, map->vma->vm_end);
+ mstart = max(range->start, map_start);
+ mend = min(range->end, map_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end,
- range->start, range->end, mstart, mend);
- unmap_grant_pages(map,
- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
- (mend - mstart) >> PAGE_SHIFT);
+ map->index, map->count, map_start, map_end,
+ range->start, range->end, mstart, mend);
+ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
+ (mend - mstart) >> PAGE_SHIFT);
return true;
}
@@ -1042,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
return -EINVAL;
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
- index, count, vma->vm_start, vma->vm_pgoff);
+ index, count, vma->vm_start, vma->vm_pgoff);
mutex_lock(&priv->lock);
map = gntdev_find_map_index(priv, index, count);
if (!map)
goto unlock_out;
- if (use_ptemod && map->vma)
- goto unlock_out;
- if (atomic_read(&map->live_grants)) {
- err = -EAGAIN;
+ if (!atomic_add_unless(&map->in_use, 1, 1))
goto unlock_out;
- }
+
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1074,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
+ map->pages_vm_start = vma->vm_start;
+
if (use_ptemod) {
- map->vma = vma;
err = mmu_interval_notifier_insert_locked(
&map->notifier, vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
- if (err) {
- map->vma = NULL;
+ if (err)
goto out_unlock_put;
- }
+
+ map->notifier_init = true;
}
mutex_unlock(&priv->lock);
@@ -1099,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
*/
mmu_interval_read_begin(&map->notifier);
- map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start,
find_grant_ptes, map);
@@ -1128,13 +1125,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
out_unlock_put:
mutex_unlock(&priv->lock);
out_put_map:
- if (use_ptemod) {
+ if (use_ptemod)
unmap_grant_pages(map, 0, map->count);
- if (map->vma) {
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
- }
gntdev_put_map(priv, map);
return err;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
5c13a4a0291b ("xen/gntdev: Accommodate VMA splitting")
dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
ce2f46f3531a ("xen/gntdev: fix unmap notification order")
f28347cc6639 ("Xen/gntdev: don't ignore kernel unmapping error")
30dcc56bba91 ("xen: assume XENFEAT_gnttab_map_avail_bits being set for pv guests")
970655aa9b42 ("xen/gntdev: fix gntdev_mmap() error exit path")
bce21a2b48ed ("Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF}")
36caa3fedf06 ("Xen/gntdev: don't needlessly allocate k{,un}map_ops[]")
8310b77b48c5 ("Xen/gnttab: handle p2m update errors on a per-slot basis")
36bf1dfb8b26 ("xen/arm: don't ignore return errors from set_phys_to_machine")
ebee0eab0859 ("Xen/gntdev: correct error checking in gntdev_map_grant_pages()")
dbe5283605b3 ("Xen/gntdev: correct dev_bus_addr handling in gntdev_map_grant_pages()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5c13a4a0291b30191eff9ead8d010e1ca43a4d0c Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b(a)runbox.com>
Date: Sun, 2 Oct 2022 18:20:06 -0400
Subject: [PATCH] xen/gntdev: Accommodate VMA splitting
Prior to this commit, the gntdev driver code did not handle the
following scenario correctly with paravirtualized (PV) Xen domains:
* User process sets up a gntdev mapping composed of two grant mappings
(i.e., two pages shared by another Xen domain).
* User process munmap()s one of the pages.
* User process munmap()s the remaining page.
* User process exits.
In the scenario above, the user process would cause the kernel to log
the following messages in dmesg for the first munmap(), and the second
munmap() call would result in similar log messages:
BUG: Bad page map in process doublemap.test pte:... pmd:...
page:0000000057c97bff refcount:1 mapcount:-1 \
mapping:0000000000000000 index:0x0 pfn:...
...
page dumped because: bad pte
...
file:gntdev fault:0x0 mmap:gntdev_mmap [xen_gntdev] readpage:0x0
...
Call Trace:
<TASK>
dump_stack_lvl+0x46/0x5e
print_bad_pte.cold+0x66/0xb6
unmap_page_range+0x7e5/0xdc0
unmap_vmas+0x78/0xf0
unmap_region+0xa8/0x110
__do_munmap+0x1ea/0x4e0
__vm_munmap+0x75/0x120
__x64_sys_munmap+0x28/0x40
do_syscall_64+0x38/0x90
entry_SYSCALL_64_after_hwframe+0x61/0xcb
...
For each munmap() call, the Xen hypervisor (if built with CONFIG_DEBUG)
would print out the following and trigger a general protection fault in
the affected Xen PV domain:
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
(XEN) d0v... Attempt to implicitly unmap d0's grant PTE ...
As of this writing, gntdev_grant_map structure's vma field (referred to
as map->vma below) is mainly used for checking the start and end
addresses of mappings. However, with split VMAs, these may change, and
there could be more than one VMA associated with a gntdev mapping.
Hence, remove the use of map->vma and rely on map->pages_vm_start for
the original start address and on (map->count << PAGE_SHIFT) for the
original mapping size. Let the invalidate() and find_special_page()
hooks use these.
Also, given that there can be multiple VMAs associated with a gntdev
mapping, move the "mmu_interval_notifier_remove(&map->notifier)" call to
the end of gntdev_put_map, so that the MMU notifier is only removed
after the closing of the last remaining VMA.
Finally, use an atomic to prevent inadvertent gntdev mapping re-use,
instead of using the map->live_grants atomic counter and/or the map->vma
pointer (the latter of which is now removed). This prevents the
userspace from mmap()'ing (with MAP_FIXED) a gntdev mapping over the
same address range as a previously set up gntdev mapping. This scenario
can be summarized with the following call-trace, which was valid prior
to this commit:
mmap
gntdev_mmap
mmap (repeat mmap with MAP_FIXED over the same address range)
gntdev_invalidate
unmap_grant_pages (sets 'being_removed' entries to true)
gnttab_unmap_refs_async
unmap_single_vma
gntdev_mmap (maps the shared pages again)
munmap
gntdev_invalidate
unmap_grant_pages
(no-op because 'being_removed' entries are true)
unmap_single_vma (For PV domains, Xen reports that a granted page
is being unmapped and triggers a general protection fault in the
affected domain, if Xen was built with CONFIG_DEBUG)
The fix for this last scenario could be worth its own commit, but we
opted for a single commit, because removing the gntdev_grant_map
structure's vma field requires guarding the entry to gntdev_mmap(), and
the live_grants atomic counter is not sufficient on its own to prevent
the mmap() over a pre-existing mapping.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: ab31523c2fca ("xen/gntdev: allow usermode to map granted pages")
Cc: stable(a)vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b(a)runbox.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-3-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross(a)suse.com>
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 40ef379c28ab..9c286b2a1900 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
};
struct gntdev_grant_map {
+ atomic_t in_use;
struct mmu_interval_notifier notifier;
+ bool notifier_init;
struct list_head next;
- struct vm_area_struct *vma;
int index;
int count;
int flags;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eb0586b9767d..4d9a3050de6a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
*/
}
+ if (use_ptemod && map->notifier_init)
+ mmu_interval_notifier_remove(&map->notifier);
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
{
struct gntdev_grant_map *map = data;
- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
(1 << _GNTMAP_guest_avail0);
u64 pte_maddr;
@@ -508,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
- if (use_ptemod) {
- WARN_ON(map->vma != vma);
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
+
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
}
@@ -540,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
+ unsigned long map_start, map_end;
if (!mmu_notifier_range_blockable(range))
return false;
+ map_start = map->pages_vm_start;
+ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
+
/*
* If the VMA is split or otherwise changed the notifier is not
* updated, but we don't want to process VA's outside the modified
* VMA. FIXME: It would be much more understandable to just prevent
* modifying the VMA in the first place.
*/
- if (map->vma->vm_start >= range->end ||
- map->vma->vm_end <= range->start)
+ if (map_start >= range->end || map_end <= range->start)
return true;
- mstart = max(range->start, map->vma->vm_start);
- mend = min(range->end, map->vma->vm_end);
+ mstart = max(range->start, map_start);
+ mend = min(range->end, map_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end,
- range->start, range->end, mstart, mend);
- unmap_grant_pages(map,
- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
- (mend - mstart) >> PAGE_SHIFT);
+ map->index, map->count, map_start, map_end,
+ range->start, range->end, mstart, mend);
+ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
+ (mend - mstart) >> PAGE_SHIFT);
return true;
}
@@ -1042,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
return -EINVAL;
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
- index, count, vma->vm_start, vma->vm_pgoff);
+ index, count, vma->vm_start, vma->vm_pgoff);
mutex_lock(&priv->lock);
map = gntdev_find_map_index(priv, index, count);
if (!map)
goto unlock_out;
- if (use_ptemod && map->vma)
- goto unlock_out;
- if (atomic_read(&map->live_grants)) {
- err = -EAGAIN;
+ if (!atomic_add_unless(&map->in_use, 1, 1))
goto unlock_out;
- }
+
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1074,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
+ map->pages_vm_start = vma->vm_start;
+
if (use_ptemod) {
- map->vma = vma;
err = mmu_interval_notifier_insert_locked(
&map->notifier, vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
- if (err) {
- map->vma = NULL;
+ if (err)
goto out_unlock_put;
- }
+
+ map->notifier_init = true;
}
mutex_unlock(&priv->lock);
@@ -1099,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
*/
mmu_interval_read_begin(&map->notifier);
- map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start,
find_grant_ptes, map);
@@ -1128,13 +1125,8 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
out_unlock_put:
mutex_unlock(&priv->lock);
out_put_map:
- if (use_ptemod) {
+ if (use_ptemod)
unmap_grant_pages(map, 0, map->count);
- if (map->vma) {
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
- }
gntdev_put_map(priv, map);
return err;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
0991028cd495 ("xen/gntdev: Prevent leaking grants")
166d38632316 ("xen/gntdev: Ignore failure to unmap INVALID_GRANT_HANDLE")
dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
ce2f46f3531a ("xen/gntdev: fix unmap notification order")
f28347cc6639 ("Xen/gntdev: don't ignore kernel unmapping error")
bce21a2b48ed ("Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF}")
36caa3fedf06 ("Xen/gntdev: don't needlessly allocate k{,un}map_ops[]")
8310b77b48c5 ("Xen/gnttab: handle p2m update errors on a per-slot basis")
36bf1dfb8b26 ("xen/arm: don't ignore return errors from set_phys_to_machine")
ebee0eab0859 ("Xen/gntdev: correct error checking in gntdev_map_grant_pages()")
dbe5283605b3 ("Xen/gntdev: correct dev_bus_addr handling in gntdev_map_grant_pages()")
0102e4efda76 ("xen: Use evtchn_type_t as a type for event channels")
b3f7931f5c61 ("xen/gntdev: switch from kcalloc() to kvcalloc()")
3b06ac6707c1 ("xen/gntdev: replace global limit of mapped pages by limit per call")
d3eeb1d77c5d ("xen/gntdev: use mmu_interval_notifier_insert")
ee7f5225dc3c ("xen: Stop abusing DT of_dma_configure API")
bce5963bcb4f ("xen/events: fix binding user event channels to cpus")
dfcd66604c1c ("mm/mmu_notifier: convert user range->blockable to helper function")
a3e0d41c2b1f ("mm/hmm: improve driver API to work and wait over a range")
73231612dc7c ("mm/hmm: improve and rename hmm_vma_fault() to hmm_range_fault()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0991028cd49567d7016d1b224fe0117c35059f86 Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b(a)runbox.com>
Date: Sun, 2 Oct 2022 18:20:05 -0400
Subject: [PATCH] xen/gntdev: Prevent leaking grants
Prior to this commit, if a grant mapping operation failed partially,
some of the entries in the map_ops array would be invalid, whereas all
of the entries in the kmap_ops array would be valid. This in turn would
cause the following logic in gntdev_map_grant_pages to become invalid:
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (!use_ptemod)
alloced++;
}
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
}
}
}
...
atomic_add(alloced, &map->live_grants);
Assume that use_ptemod is true (i.e., the domain mapping the granted
pages is a paravirtualized domain). In the code excerpt above, note that
the "alloced" variable is only incremented when both kmap_ops[i].status
and map_ops[i].status are set to GNTST_okay (i.e., both mapping
operations are successful). However, as also noted above, there are
cases where a grant mapping operation fails partially, breaking the
assumption of the code excerpt above.
The aforementioned causes map->live_grants to be incorrectly set. In
some cases, all of the map_ops mappings fail, but all of the kmap_ops
mappings succeed, meaning that live_grants may remain zero. This in turn
makes it impossible to unmap the successfully grant-mapped pages pointed
to by kmap_ops, because unmap_grant_pages has the following snippet of
code at its beginning:
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
In other cases where only some of the map_ops mappings fail but all
kmap_ops mappings succeed, live_grants is made positive, but when the
user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
will then make map->live_grants negative, because the latter function
does not check if all of the pages that were requested to be unmapped
were actually unmapped, and the same function unconditionally subtracts
"data->count" (i.e., a value that can be greater than map->live_grants)
from map->live_grants. The side effects of a negative live_grants value
have not been studied.
The net effect of all of this is that grant references are leaked in one
of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
mechanism extensively for X11 GUI isolation), this issue manifests
itself with warning messages like the following to be printed out by the
Linux kernel in the VM that had granted pages (that contain X11 GUI
window data) to dom0: "g.e. 0x1234 still pending", especially after the
user rapidly resizes GUI VM windows (causing some grant-mapping
operations to partially or completely fail, due to the fact that the VM
unshares some of the pages as part of the window resizing, making the
pages impossible to grant-map from dom0).
The fix for this issue involves counting all successful map_ops and
kmap_ops mappings separately, and then adding the sum to live_grants.
During unmapping, only the number of successfully unmapped grants is
subtracted from live_grants. The code is also modified to check for
negative live_grants values after the subtraction and warn the user.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
Cc: stable(a)vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b(a)runbox.com>
Acked-by: Demi Marie Obenour <demi(a)invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross(a)suse.com>
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 84b143eef395..eb0586b9767d 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -367,8 +367,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- if (!use_ptemod)
- alloced++;
+ alloced++;
} else if (!err)
err = -EINVAL;
@@ -377,8 +376,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
- if (map->map_ops[i].status == GNTST_okay)
- alloced++;
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
} else if (!err)
err = -EINVAL;
@@ -394,8 +392,14 @@ static void __unmap_grant_pages_done(int result,
unsigned int i;
struct gntdev_grant_map *map = data->data;
unsigned int offset = data->unmap_ops - map->unmap_ops;
+ int successful_unmaps = 0;
+ int live_grants;
for (i = 0; i < data->count; i++) {
+ if (map->unmap_ops[offset + i].status == GNTST_okay &&
+ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
+ successful_unmaps++;
+
WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay &&
map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("unmap handle=%d st=%d\n",
@@ -403,6 +407,10 @@ static void __unmap_grant_pages_done(int result,
map->unmap_ops[offset+i].status);
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
if (use_ptemod) {
+ if (map->kunmap_ops[offset + i].status == GNTST_okay &&
+ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
+ successful_unmaps++;
+
WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay &&
map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("kunmap handle=%u st=%d\n",
@@ -411,11 +419,15 @@ static void __unmap_grant_pages_done(int result,
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
}
}
+
/*
* Decrease the live-grant counter. This must happen after the loop to
* prevent premature reuse of the grants by gnttab_mmap().
*/
- atomic_sub(data->count, &map->live_grants);
+ live_grants = atomic_sub_return(successful_unmaps, &map->live_grants);
+ if (WARN_ON(live_grants < 0))
+ pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n",
+ __func__, live_grants, successful_unmaps);
/* Release reference taken by __unmap_grant_pages */
gntdev_put_map(NULL, map);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
0991028cd495 ("xen/gntdev: Prevent leaking grants")
166d38632316 ("xen/gntdev: Ignore failure to unmap INVALID_GRANT_HANDLE")
dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
ce2f46f3531a ("xen/gntdev: fix unmap notification order")
f28347cc6639 ("Xen/gntdev: don't ignore kernel unmapping error")
bce21a2b48ed ("Xen/gnttab: introduce common INVALID_GRANT_{HANDLE,REF}")
36caa3fedf06 ("Xen/gntdev: don't needlessly allocate k{,un}map_ops[]")
8310b77b48c5 ("Xen/gnttab: handle p2m update errors on a per-slot basis")
36bf1dfb8b26 ("xen/arm: don't ignore return errors from set_phys_to_machine")
ebee0eab0859 ("Xen/gntdev: correct error checking in gntdev_map_grant_pages()")
dbe5283605b3 ("Xen/gntdev: correct dev_bus_addr handling in gntdev_map_grant_pages()")
0102e4efda76 ("xen: Use evtchn_type_t as a type for event channels")
b3f7931f5c61 ("xen/gntdev: switch from kcalloc() to kvcalloc()")
3b06ac6707c1 ("xen/gntdev: replace global limit of mapped pages by limit per call")
d3eeb1d77c5d ("xen/gntdev: use mmu_interval_notifier_insert")
ee7f5225dc3c ("xen: Stop abusing DT of_dma_configure API")
bce5963bcb4f ("xen/events: fix binding user event channels to cpus")
dfcd66604c1c ("mm/mmu_notifier: convert user range->blockable to helper function")
a3e0d41c2b1f ("mm/hmm: improve driver API to work and wait over a range")
73231612dc7c ("mm/hmm: improve and rename hmm_vma_fault() to hmm_range_fault()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 0991028cd49567d7016d1b224fe0117c35059f86 Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b(a)runbox.com>
Date: Sun, 2 Oct 2022 18:20:05 -0400
Subject: [PATCH] xen/gntdev: Prevent leaking grants
Prior to this commit, if a grant mapping operation failed partially,
some of the entries in the map_ops array would be invalid, whereas all
of the entries in the kmap_ops array would be valid. This in turn would
cause the following logic in gntdev_map_grant_pages to become invalid:
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
if (!use_ptemod)
alloced++;
}
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
if (map->map_ops[i].status == GNTST_okay)
alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
}
}
}
...
atomic_add(alloced, &map->live_grants);
Assume that use_ptemod is true (i.e., the domain mapping the granted
pages is a paravirtualized domain). In the code excerpt above, note that
the "alloced" variable is only incremented when both kmap_ops[i].status
and map_ops[i].status are set to GNTST_okay (i.e., both mapping
operations are successful). However, as also noted above, there are
cases where a grant mapping operation fails partially, breaking the
assumption of the code excerpt above.
The aforementioned causes map->live_grants to be incorrectly set. In
some cases, all of the map_ops mappings fail, but all of the kmap_ops
mappings succeed, meaning that live_grants may remain zero. This in turn
makes it impossible to unmap the successfully grant-mapped pages pointed
to by kmap_ops, because unmap_grant_pages has the following snippet of
code at its beginning:
if (atomic_read(&map->live_grants) == 0)
return; /* Nothing to do */
In other cases where only some of the map_ops mappings fail but all
kmap_ops mappings succeed, live_grants is made positive, but when the
user requests unmapping the grant-mapped pages, __unmap_grant_pages_done
will then make map->live_grants negative, because the latter function
does not check if all of the pages that were requested to be unmapped
were actually unmapped, and the same function unconditionally subtracts
"data->count" (i.e., a value that can be greater than map->live_grants)
from map->live_grants. The side effects of a negative live_grants value
have not been studied.
The net effect of all of this is that grant references are leaked in one
of the above conditions. In Qubes OS v4.1 (which uses Xen's grant
mechanism extensively for X11 GUI isolation), this issue manifests
itself with warning messages like the following to be printed out by the
Linux kernel in the VM that had granted pages (that contain X11 GUI
window data) to dom0: "g.e. 0x1234 still pending", especially after the
user rapidly resizes GUI VM windows (causing some grant-mapping
operations to partially or completely fail, due to the fact that the VM
unshares some of the pages as part of the window resizing, making the
pages impossible to grant-map from dom0).
The fix for this issue involves counting all successful map_ops and
kmap_ops mappings separately, and then adding the sum to live_grants.
During unmapping, only the number of successfully unmapped grants is
subtracted from live_grants. The code is also modified to check for
negative live_grants values after the subtraction and warn the user.
Link: https://github.com/QubesOS/qubes-issues/issues/7631
Fixes: dbe97cff7dd9 ("xen/gntdev: Avoid blocking in unmap_grant_pages()")
Cc: stable(a)vger.kernel.org
Signed-off-by: M. Vefa Bicakci <m.v.b(a)runbox.com>
Acked-by: Demi Marie Obenour <demi(a)invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Link: https://lore.kernel.org/r/20221002222006.2077-2-m.v.b@runbox.com
Signed-off-by: Juergen Gross <jgross(a)suse.com>
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 84b143eef395..eb0586b9767d 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -367,8 +367,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- if (!use_ptemod)
- alloced++;
+ alloced++;
} else if (!err)
err = -EINVAL;
@@ -377,8 +376,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
- if (map->map_ops[i].status == GNTST_okay)
- alloced++;
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
} else if (!err)
err = -EINVAL;
@@ -394,8 +392,14 @@ static void __unmap_grant_pages_done(int result,
unsigned int i;
struct gntdev_grant_map *map = data->data;
unsigned int offset = data->unmap_ops - map->unmap_ops;
+ int successful_unmaps = 0;
+ int live_grants;
for (i = 0; i < data->count; i++) {
+ if (map->unmap_ops[offset + i].status == GNTST_okay &&
+ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
+ successful_unmaps++;
+
WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay &&
map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("unmap handle=%d st=%d\n",
@@ -403,6 +407,10 @@ static void __unmap_grant_pages_done(int result,
map->unmap_ops[offset+i].status);
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
if (use_ptemod) {
+ if (map->kunmap_ops[offset + i].status == GNTST_okay &&
+ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
+ successful_unmaps++;
+
WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay &&
map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("kunmap handle=%u st=%d\n",
@@ -411,11 +419,15 @@ static void __unmap_grant_pages_done(int result,
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
}
}
+
/*
* Decrease the live-grant counter. This must happen after the loop to
* prevent premature reuse of the grants by gnttab_mmap().
*/
- atomic_sub(data->count, &map->live_grants);
+ live_grants = atomic_sub_return(successful_unmaps, &map->live_grants);
+ if (WARN_ON(live_grants < 0))
+ pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n",
+ __func__, live_grants, successful_unmaps);
/* Release reference taken by __unmap_grant_pages */
gntdev_put_map(NULL, map);