unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages. I also
believe this is responsible for various deadlocks I have experienced in
the past.
Avoid these problems by making unmap_grant_pages async. This requires
making it return void, as any errors will not be available when the
function returns. Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected. Additionally, a failed call will not prevent further calls
from being made, but this is harmless.
Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle. Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile. Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.
It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.
Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable(a)vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi(a)invisiblethingslab.com>
---
drivers/xen/gntdev-common.h | 7 ++
drivers/xen/gntdev.c | 157 ++++++++++++++++++++++++------------
2 files changed, 113 insertions(+), 51 deletions(-)
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 20d7d059dadb..40ef379c28ab 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -16,6 +16,7 @@
#include <linux/mmu_notifier.h>
#include <linux/types.h>
#include <xen/interface/event_channel.h>
+#include <xen/grant_table.h>
struct gntdev_dmabuf_priv;
@@ -56,6 +57,7 @@ struct gntdev_grant_map {
struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops;
+ bool *being_removed;
struct page **pages;
unsigned long pages_vm_start;
@@ -73,6 +75,11 @@ struct gntdev_grant_map {
/* Needed to avoid allocation in gnttab_dma_free_pages(). */
xen_pfn_t *frames;
#endif
+
+ /* Number of live grants */
+ atomic_t live_grants;
+ /* Needed to avoid allocation in __unmap_grant_pages */
+ struct gntab_unmap_queue_data unmap_data;
};
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 59ffea800079..4b56c39f766d 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -35,6 +35,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/refcount.h>
+#include <linux/workqueue.h>
#include <xen/xen.h>
#include <xen/grant_table.h>
@@ -60,10 +61,11 @@ module_param(limit, uint, 0644);
MODULE_PARM_DESC(limit,
"Maximum number of grants that may be mapped by one mapping request");
+/* True in PV mode, false otherwise */
static int use_ptemod;
-static int unmap_grant_pages(struct gntdev_grant_map *map,
- int offset, int pages);
+static void unmap_grant_pages(struct gntdev_grant_map *map,
+ int offset, int pages);
static struct miscdevice gntdev_miscdev;
@@ -120,6 +122,7 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
kvfree(map->unmap_ops);
kvfree(map->kmap_ops);
kvfree(map->kunmap_ops);
+ kvfree(map->being_removed);
kfree(map);
}
@@ -140,10 +143,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
GFP_KERNEL);
add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->being_removed =
+ kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
- NULL == add->pages)
+ NULL == add->pages ||
+ NULL == add->being_removed)
goto err;
if (use_ptemod) {
add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
@@ -250,9 +256,36 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
if (!refcount_dec_and_test(&map->users))
return;
- if (map->pages && !use_ptemod)
+ if (map->pages && !use_ptemod) {
+ /*
+ * Increment the reference count. This ensures that the
+ * subsequent call to unmap_grant_pages() will not wind up
+ * re-entering itself. It *can* wind up calling
+ * gntdev_put_map() recursively, but such calls will be with a
+ * reference count greater than 1, so they will return before
+ * this code is reached. The recursion depth is thus limited to
+ * 1. Do NOT use refcount_inc() here, as it will detect that
+ * the reference count is zero and WARN().
+ */
+ refcount_set(&map->users, 1);
+
+ /*
+ * Unmap the grants. This may or may not be asynchronous, so it
+ * is possible that the reference count is 1 on return, but it
+ * could also be greater than 1.
+ */
unmap_grant_pages(map, 0, map->count);
+ /* Check if the memory now needs to be freed */
+ if (!refcount_dec_and_test(&map->users))
+ return;
+
+ /*
+ * All pages have been returned to the hypervisor, so free the
+ * map.
+ */
+ }
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -283,6 +316,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
{
+ size_t alloced = 0;
int i, err = 0;
if (!use_ptemod) {
@@ -331,97 +365,116 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status == GNTST_okay)
+ if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- else if (!err)
+ if (!use_ptemod)
+ alloced++;
+ } else if (!err)
err = -EINVAL;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) {
- if (map->kmap_ops[i].status == GNTST_okay)
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
- else if (!err)
+ } else if (!err)
err = -EINVAL;
}
}
+ atomic_add(alloced, &map->live_grants);
return err;
}
-static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void __unmap_grant_pages_done(int result,
+ struct gntab_unmap_queue_data *data)
{
- int i, err = 0;
- struct gntab_unmap_queue_data unmap_data;
-
- if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
- int pgno = (map->notify.addr >> PAGE_SHIFT);
- if (pgno >= offset && pgno < offset + pages) {
- /* No need for kmap, pages are in lowmem */
- uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
- tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
- map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
- }
- }
-
- unmap_data.unmap_ops = map->unmap_ops + offset;
- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
- unmap_data.pages = map->pages + offset;
- unmap_data.count = pages;
-
- err = gnttab_unmap_refs_sync(&unmap_data);
- if (err)
- return err;
+ unsigned int i;
+ struct gntdev_grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
- for (i = 0; i < pages; i++) {
- if (map->unmap_ops[offset+i].status)
- err = -EINVAL;
+ for (i = 0; i < data->count; i++) {
+ WARN_ON(map->unmap_ops[offset+i].status);
pr_debug("unmap handle=%d st=%d\n",
map->unmap_ops[offset+i].handle,
map->unmap_ops[offset+i].status);
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
if (use_ptemod) {
- if (map->kunmap_ops[offset+i].status)
- err = -EINVAL;
+ WARN_ON(map->kunmap_ops[offset+i].status);
pr_debug("kunmap handle=%u st=%d\n",
map->kunmap_ops[offset+i].handle,
map->kunmap_ops[offset+i].status);
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
}
}
- return err;
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+ atomic_sub(data->count, &map->live_grants);
+
+ /* Release reference taken by __unmap_grant_pages */
+ gntdev_put_map(NULL, map);
+}
+
+static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
+{
+ if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+ int pgno = (map->notify.addr >> PAGE_SHIFT);
+
+ if (pgno >= offset && pgno < offset + pages) {
+ /* No need for kmap, pages are in lowmem */
+ uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
+ tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
+ map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+ }
+ }
+
+ map->unmap_data.unmap_ops = map->unmap_ops + offset;
+ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+ map->unmap_data.pages = map->pages + offset;
+ map->unmap_data.count = pages;
+ map->unmap_data.done = __unmap_grant_pages_done;
+ map->unmap_data.data = map;
+ refcount_inc(&map->users); /* to keep map alive during async call below */
+
+ gnttab_unmap_refs_async(&map->unmap_data);
}
-static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
{
- int range, err = 0;
+ int range;
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges.
*/
- while (pages && !err) {
- while (pages &&
- map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) {
+ while (pages) {
+ while (pages && map->being_removed[offset]) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset + range].handle ==
- INVALID_GRANT_HANDLE)
+ if (map->being_removed[offset + range])
break;
+ map->being_removed[offset + range] = true;
range++;
}
- err = __unmap_grant_pages(map, offset, range);
+ if (range)
+ __unmap_grant_pages(map, offset, range);
offset += range;
pages -= range;
}
-
- return err;
}
/* ------------------------------------------------------------------ */
@@ -473,7 +526,6 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
- int err;
if (!mmu_notifier_range_blockable(range))
return false;
@@ -494,10 +546,9 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
range->start, range->end, mstart, mend);
- err = unmap_grant_pages(map,
+ unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
- WARN_ON(err);
return true;
}
@@ -985,6 +1036,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
if (use_ptemod && map->vma)
goto unlock_out;
+ if (atomic_read(&map->live_grants)) {
+ err = -EAGAIN;
+ goto unlock_out;
+ }
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
--
2.36.1
test_bit(), as any other bitmap op, takes `unsigned long *` as a
second argument (pointer to the actual bitmap), as any bitmap
itself is an array of unsigned longs. However, the ia64_get_irr()
code passes a ref to `u64` as a second argument.
This works with the ia64 bitops implementation due to that they
have `void *` as the second argument and then cast it later on.
This works with the bitmap API itself due to that `unsigned long`
has the same size on ia64 as `u64` (`unsigned long long`), but
from the compiler PoV those two are different.
Define @irr as `unsigned long` to fix that. That implies no
functional changes. Has been hidden for 16 years!
Fixes: a58786917ce2 ("[IA64] avoid broken SAL_CACHE_FLUSH implementations")
Cc: stable(a)vger.kernel.org # 2.6.16+
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Alexander Lobakin <alexandr.lobakin(a)intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Reviewed-by: Yury Norov <yury.norov(a)gmail.com>
---
arch/ia64/include/asm/processor.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 7cbce290f4e5..757c2f6d8d4b 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -538,7 +538,7 @@ ia64_get_irr(unsigned int vector)
{
unsigned int reg = vector / 64;
unsigned int bit = vector % 64;
- u64 irr;
+ unsigned long irr;
switch (reg) {
case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break;
--
2.36.1
This is a note to let you know that I've just added the patch titled
usb: chipidea: udc: check request status before setting device
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From b24346a240b36cfc4df194d145463874985aa29b Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2(a)nxp.com>
Date: Thu, 23 Jun 2022 11:02:42 +0800
Subject: usb: chipidea: udc: check request status before setting device
address
The complete() function may be called even though request is not
completed. In this case, it's necessary to check request status so
as not to set device address wrongly.
Fixes: 10775eb17bee ("usb: chipidea: udc: update gadget states according to ch9")
cc: <stable(a)vger.kernel.org>
Signed-off-by: Xu Yang <xu.yang_2(a)nxp.com>
Link: https://lore.kernel.org/r/20220623030242.41796-1-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/chipidea/udc.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index dc6c96e04bcf..3b8bf6daf7d0 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1048,6 +1048,9 @@ isr_setup_status_complete(struct usb_ep *ep, struct usb_request *req)
struct ci_hdrc *ci = req->context;
unsigned long flags;
+ if (req->status < 0)
+ return;
+
if (ci->setaddr) {
hw_usb_set_address(ci, ci->address);
ci->setaddr = false;
--
2.36.1
This is a note to let you know that I've just added the patch titled
USB: gadget: Fix double-free bug in raw_gadget driver
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 90bc2af24638659da56397ff835f3c95a948f991 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Wed, 22 Jun 2022 10:46:31 -0400
Subject: USB: gadget: Fix double-free bug in raw_gadget driver
Re-reading a recently merged fix to the raw_gadget driver showed that
it inadvertently introduced a double-free bug in a failure pathway.
If raw_ioctl_init() encounters an error after the driver ID number has
been allocated, it deallocates the ID number before returning. But
when dev_free() runs later on, it will then try to deallocate the ID
number a second time.
Closely related to this issue is another error in the recent fix: The
ID number is stored in the raw_dev structure before the code checks to
see whether the structure has already been initialized, in which case
the new ID number would overwrite the earlier value.
The solution to both bugs is to keep the new ID number in a local
variable, and store it in the raw_dev structure only after the check
for prior initialization. No errors can occur after that point, so
the double-free will never happen.
Fixes: f2d8c2606825 ("usb: gadget: Fix non-unique driver names in raw-gadget driver")
CC: Andrey Konovalov <andreyknvl(a)gmail.com>
CC: <stable(a)vger.kernel.org>
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Link: https://lore.kernel.org/r/YrMrRw5AyIZghN0v@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/gadget/legacy/raw_gadget.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c
index 5c8481cef35f..2acece16b890 100644
--- a/drivers/usb/gadget/legacy/raw_gadget.c
+++ b/drivers/usb/gadget/legacy/raw_gadget.c
@@ -430,6 +430,7 @@ static int raw_release(struct inode *inode, struct file *fd)
static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
{
int ret = 0;
+ int driver_id_number;
struct usb_raw_init arg;
char *udc_driver_name;
char *udc_device_name;
@@ -452,10 +453,9 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
return -EINVAL;
}
- ret = ida_alloc(&driver_id_numbers, GFP_KERNEL);
- if (ret < 0)
- return ret;
- dev->driver_id_number = ret;
+ driver_id_number = ida_alloc(&driver_id_numbers, GFP_KERNEL);
+ if (driver_id_number < 0)
+ return driver_id_number;
driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL);
if (!driver_driver_name) {
@@ -463,7 +463,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
goto out_free_driver_id_number;
}
snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX,
- DRIVER_NAME ".%d", dev->driver_id_number);
+ DRIVER_NAME ".%d", driver_id_number);
udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL);
if (!udc_driver_name) {
@@ -507,6 +507,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
dev->driver.driver.name = driver_driver_name;
dev->driver.udc_name = udc_device_name;
dev->driver.match_existing_only = 1;
+ dev->driver_id_number = driver_id_number;
dev->state = STATE_DEV_INITIALIZED;
spin_unlock_irqrestore(&dev->lock, flags);
@@ -521,7 +522,7 @@ static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
out_free_driver_driver_name:
kfree(driver_driver_name);
out_free_driver_id_number:
- ida_free(&driver_id_numbers, dev->driver_id_number);
+ ida_free(&driver_id_numbers, driver_id_number);
return ret;
}
--
2.36.1
On Wed, May 11, 2022 at 10:02 PM Amir Goldstein <amir73il(a)gmail.com> wrote:
>
> The logic for handling events on child in groups that have a mark on
> the parent inode, but without FS_EVENT_ON_CHILD flag in the mask is
> duplicated in several places and inconsistent.
>
> Move the logic into the preparation of mark type iterator, so that the
> parent mark type will be excluded from all mark type iterations in that
> case.
>
> This results in several subtle changes of behavior, hopefully all
> desired changes of behavior, for example:
>
> - Group A has a mount mark with FS_MODIFY in mask
> - Group A has a mark with ignore mask that does not survive FS_MODIFY
> and does not watch children on directory D.
> - Group B has a mark with FS_MODIFY in mask that does watch children
> on directory D.
> - FS_MODIFY event on file D/foo should not clear the ignore mask of
> group A, but before this change it does
>
> And if group A ignore mask was set to survive FS_MODIFY:
> - FS_MODIFY event on file D/foo should be reported to group A on account
> of the mount mark, but before this change it is wrongly ignored
>
> Fixes: 2f02fd3fa13e ("fanotify: fix ignore mask logic for events on child and on dir")
> Reported-by: Jan Kara <jack(a)suse.com>
> Link: https://lore.kernel.org/linux-fsdevel/20220314113337.j7slrb5srxukztje@quack…
> Signed-off-by: Amir Goldstein <amir73il(a)gmail.com>
> ---
Greg,
FYI, this needs the previous commit to apply to 5.18.y:
e730558adffb fsnotify: consistent behavior for parent not watching children
14362a254179 fsnotify: introduce mark type iterator
They won't apply to earlier versions and this is a fix for a very minor bug
that existed forever, so no need to bother.
Thanks,
Amir.