The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 55e56f06ed71d9441f3abd5b1d3c1a870812b3fe Mon Sep 17 00:00:00 2001
From: Matthew Wilcox willy@infradead.org Date: Tue, 27 Nov 2018 13:16:34 -0800 Subject: [PATCH] dax: Don't access a freed inode
After we drop the i_pages lock, the inode can be freed at any time. The get_unlocked_entry() code has no choice but to reacquire the lock, so it can't be used here. Create a new wait_entry_unlocked() which takes care not to acquire the lock or dereference the address_space in any way.
Fixes: c2a7d2a11552 ("filesystem-dax: Introduce dax_lock_mapping_entry()") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox willy@infradead.org Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Dan Williams dan.j.williams@intel.com
diff --git a/fs/dax.c b/fs/dax.c index e69fc231833b..3f592dc18d67 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -232,6 +232,34 @@ static void *get_unlocked_entry(struct xa_state *xas) } }
+/* + * The only thing keeping the address space around is the i_pages lock + * (it's cycled in clear_inode() after removing the entries from i_pages) + * After we call xas_unlock_irq(), we cannot touch xas->xa. + */ +static void wait_entry_unlocked(struct xa_state *xas, void *entry) +{ + struct wait_exceptional_entry_queue ewait; + wait_queue_head_t *wq; + + init_wait(&ewait.wait); + ewait.wait.func = wake_exceptional_entry_func; + + wq = dax_entry_waitqueue(xas, entry, &ewait.key); + prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); + xas_unlock_irq(xas); + schedule(); + finish_wait(wq, &ewait.wait); + + /* + * Entry lock waits are exclusive. Wake up the next waiter since + * we aren't sure we will acquire the entry lock and thus wake + * the next waiter up on unlock. + */ + if (waitqueue_active(wq)) + __wake_up(wq, TASK_NORMAL, 1, &ewait.key); +} + static void put_unlocked_entry(struct xa_state *xas, void *entry) { /* If we were the only waiter woken, wake the next one */ @@ -389,9 +417,7 @@ bool dax_lock_mapping_entry(struct page *page) entry = xas_load(&xas); if (dax_is_locked(entry)) { rcu_read_unlock(); - entry = get_unlocked_entry(&xas); - xas_unlock_irq(&xas); - put_unlocked_entry(&xas, entry); + wait_entry_unlocked(&xas, entry); rcu_read_lock(); continue; }
On Tue, Dec 11, 2018 at 03:00:09PM +0100, gregkh@linuxfoundation.org wrote:
I have only compile-tested this backport. Dan, do you want to run it through some tests before Greg applies it?
From e01d37913b5577acaa2e8e35200081eae2795087 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox willy@infradead.org Date: Tue, 11 Dec 2018 10:16:45 -0500 Subject: [PATCH 2/2] dax: Don't access a freed inode
commit 55e56f06ed71d9441f3abd5b1d3c1a870812b3fe upstream.
After we drop the i_pages lock, the inode can be freed at any time. The get_unlocked_entry() code has no choice but to reacquire the lock, so it can't be used here. Create a new wait_entry_unlocked() which takes care not to acquire the lock or dereference the address_space in any way.
Fixes: c2a7d2a11552 ("filesystem-dax: Introduce dax_lock_mapping_entry()") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox willy@infradead.org Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Dan Williams dan.j.williams@intel.com --- fs/dax.c | 69 ++++++++++++++++++++++++++------------------------------ 1 file changed, 32 insertions(+), 37 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c index 3a2682a6c832..415605fafaeb 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -229,8 +229,8 @@ static void put_unlocked_mapping_entry(struct address_space *mapping, * * Must be called with the i_pages lock held. */ -static void *__get_unlocked_mapping_entry(struct address_space *mapping, - pgoff_t index, void ***slotp, bool (*wait_fn)(void)) +static void *get_unlocked_mapping_entry(struct address_space *mapping, + pgoff_t index, void ***slotp) { void *entry, **slot; struct wait_exceptional_entry_queue ewait; @@ -240,8 +240,6 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping, ewait.wait.func = wake_exceptional_entry_func;
for (;;) { - bool revalidate; - entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot); if (!entry || @@ -256,30 +254,39 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping, prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); xa_unlock_irq(&mapping->i_pages); - revalidate = wait_fn(); + schedule(); finish_wait(wq, &ewait.wait); xa_lock_irq(&mapping->i_pages); - if (revalidate) { - put_unlocked_mapping_entry(mapping, index, entry); - return ERR_PTR(-EAGAIN); - } } }
-static bool entry_wait(void) +/* + * The only thing keeping the address space around is the i_pages lock + * (it's cycled in clear_inode() after removing the entries from i_pages) + * After we call xas_unlock_irq(), we cannot touch xas->xa. + */ +static void wait_entry_unlocked(struct address_space *mapping, pgoff_t index, + void ***slotp, void *entry) { + struct wait_exceptional_entry_queue ewait; + wait_queue_head_t *wq; + + init_wait(&ewait.wait); + ewait.wait.func = wake_exceptional_entry_func; + + wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key); + prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); + xa_unlock_irq(&mapping->i_pages); schedule(); + finish_wait(wq, &ewait.wait); + /* - * Never return an ERR_PTR() from - * __get_unlocked_mapping_entry(), just keep looping. + * Entry lock waits are exclusive. Wake up the next waiter since + * we aren't sure we will acquire the entry lock and thus wake + * the next waiter up on unlock. */ - return false; -} - -static void *get_unlocked_mapping_entry(struct address_space *mapping, - pgoff_t index, void ***slotp) -{ - return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait); + if (waitqueue_active(wq)) + __wake_up(wq, TASK_NORMAL, 1, &ewait.key); }
static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index) @@ -398,19 +405,6 @@ static struct page *dax_busy_page(void *entry) return NULL; }
-static bool entry_wait_revalidate(void) -{ - rcu_read_unlock(); - schedule(); - rcu_read_lock(); - - /* - * Tell __get_unlocked_mapping_entry() to take a break, we need - * to revalidate page->mapping after dropping locks - */ - return true; -} - bool dax_lock_mapping_entry(struct page *page) { pgoff_t index; @@ -446,14 +440,15 @@ bool dax_lock_mapping_entry(struct page *page) } index = page->index;
- entry = __get_unlocked_mapping_entry(mapping, index, &slot, - entry_wait_revalidate); + entry = __radix_tree_lookup(&mapping->i_pages, index, + NULL, &slot); if (!entry) { xa_unlock_irq(&mapping->i_pages); break; - } else if (IS_ERR(entry)) { - xa_unlock_irq(&mapping->i_pages); - WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN); + } else if (slot_locked(mapping, slot)) { + rcu_read_unlock(); + wait_entry_unlocked(mapping, index, &slot, entry); + rcu_read_lock(); continue; } lock_slot(mapping, slot);
On Thu, Dec 13, 2018 at 11:14 PM Greg KH gregkh@linuxfoundation.org wrote:
Tested-by: Dan Williams dan.j.williams@intel.com
However, on the upstream version of this fix Linus pointed out an additional fixup. Once that goes upstream [1] I'll circle back to provide a fixed up version of this backport to keep -stable and mainline more aligned.
[1]: https://marc.info/?l=linux-fsdevel&m=154542184202064&w=2
On Sat, Dec 22, 2018 at 11:04 AM Sasha Levin sashal@kernel.org wrote:
Yes, I'll send a replacement once mainline has that change.
linux-stable-mirror@lists.linaro.org