From: Eric Biggers ebiggers@google.com
'igrab(d_inode(dentry->d_parent))' without holding dentry->d_lock is broken because without d_lock, d_parent can be concurrently changed due to a rename(). Then if the old directory is immediately deleted, old d_parent->inode can be NULL. That causes a NULL dereference in igrab().
To fix this, use dget_parent() to safely grab a reference to the parent dentry, which pins the inode. This also eliminates the need to use d_find_any_alias() other than for the initial inode, as we no longer throw away the dentry at each step.
This is an extremely hard race to hit, but it is possible. Adding a udelay() in between the reads of ->d_parent and its ->d_inode makes it reproducible on a no-journal filesystem using the following program:
#include <fcntl.h> #include <unistd.h>
int main() { if (fork()) { for (;;) { mkdir("dir1", 0700); int fd = open("dir1/file", O_RDWR|O_CREAT|O_SYNC); write(fd, "X", 1); close(fd); } } else { mkdir("dir2", 0700); for (;;) { rename("dir1/file", "dir2/file"); rmdir("dir1"); } } }
Fixes: d59729f4e794 ("ext4: fix races in ext4_sync_parent()") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers ebiggers@google.com --- fs/ext4/fsync.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-)
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e10206e7f4bbe7..093c359952cdba 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -44,30 +44,28 @@ */ static int ext4_sync_parent(struct inode *inode) { - struct dentry *dentry = NULL; - struct inode *next; + struct dentry *dentry, *next; int ret = 0;
if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) return 0; - inode = igrab(inode); + dentry = d_find_any_alias(inode); + if (!dentry) + return 0; while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); - dentry = d_find_any_alias(inode); - if (!dentry) - break; - next = igrab(d_inode(dentry->d_parent)); + + next = dget_parent(dentry); dput(dentry); - if (!next) - break; - iput(inode); - inode = next; + dentry = next; + inode = dentry->d_inode; + /* * The directory inode may have gone through rmdir by now. But * the inode itself and its blocks are still allocated (we hold - * a reference to the inode so it didn't go through - * ext4_evict_inode()) and so we are safe to flush metadata - * blocks and the inode. + * a reference to the inode via its dentry), so it didn't go + * through ext4_evict_inode()) and so we are safe to flush + * metadata blocks and the inode. */ ret = sync_mapping_buffers(inode->i_mapping); if (ret) @@ -76,7 +74,7 @@ static int ext4_sync_parent(struct inode *inode) if (ret) break; } - iput(inode); + dput(dentry); return ret; }