The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 31747eda41ef3c30c09c5c096b380bf54013746a Mon Sep 17 00:00:00 2001
From: Amir Goldstein amir73il@gmail.com Date: Sun, 14 Jan 2018 18:35:40 +0200 Subject: [PATCH] ovl: hash directory inodes for fsnotify
fsnotify pins a watched directory inode in cache, but if directory dentry is released, new lookup will allocate a new dentry and a new inode. Directory events will be notified on the new inode, while fsnotify listener is watching the old pinned inode.
Hash all directory inodes to reuse the pinned inode on lookup. Pure upper dirs are hashes by real upper inode, merge and lower dirs are hashed by real lower inode.
The reference to lower inode was being held by the lower dentry object in the overlay dentry (oe->lowerstack[0]). Releasing the overlay dentry may drop lower inode refcount to zero. Add a refcount on behalf of the overlay inode to prevent that.
As a by-product, hashing directory inodes also detects multiple redirected dirs to the same lower dir and uncovered redirected dir target on and returns -ESTALE on lookup.
The reported issue dates back to initial version of overlayfs, but this patch depends on ovl_inode code that was introduced in kernel v4.13.
Cc: stable@vger.kernel.org #v4.13 Reported-by: Niklas Cassel niklas.cassel@axis.com Signed-off-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com Tested-by: Niklas Cassel niklas.cassel@axis.com
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 00b6b294272a..94d2f8a8b779 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -606,6 +606,16 @@ static int ovl_inode_set(struct inode *inode, void *data) static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, struct dentry *upperdentry) { + if (S_ISDIR(inode->i_mode)) { + /* Real lower dir moved to upper layer under us? */ + if (!lowerdentry && ovl_inode_lower(inode)) + return false; + + /* Lookup of an uncovered redirect origin? */ + if (!upperdentry && ovl_inode_upper(inode)) + return false; + } + /* * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. * This happens when finding a copied up overlay inode for a renamed @@ -633,6 +643,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, struct inode *inode; /* Already indexed or could be indexed on copy up? */ bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry)); + struct dentry *origin = indexed ? lowerdentry : NULL; + bool is_dir;
if (WARN_ON(upperdentry && indexed && !lowerdentry)) return ERR_PTR(-EIO); @@ -641,15 +653,19 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, realinode = d_inode(lowerdentry);
/* - * Copy up origin (lower) may exist for non-indexed upper, but we must - * not use lower as hash key in that case. - * Hash inodes that are or could be indexed by origin inode and - * non-indexed upper inodes that could be hard linked by upper inode. + * Copy up origin (lower) may exist for non-indexed non-dir upper, but + * we must not use lower as hash key in that case. + * Hash non-dir that is or could be indexed by origin inode. + * Hash dir that is or could be merged by origin inode. + * Hash pure upper and non-indexed non-dir by upper inode. */ - if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) { - struct inode *key = d_inode(indexed ? lowerdentry : - upperdentry); - unsigned int nlink; + is_dir = S_ISDIR(realinode->i_mode); + if (is_dir) + origin = lowerdentry; + + if (upperdentry || origin) { + struct inode *key = d_inode(origin ?: upperdentry); + unsigned int nlink = is_dir ? 1 : realinode->i_nlink;
inode = iget5_locked(dentry->d_sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key); @@ -670,8 +686,9 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, goto out; }
- nlink = ovl_get_nlink(lowerdentry, upperdentry, - realinode->i_nlink); + /* Recalculate nlink for non-dir due to indexing */ + if (!is_dir) + nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { inode = new_inode(dentry->d_sb); @@ -685,7 +702,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, ovl_set_flag(OVL_IMPURE, inode);
/* Check for non-merge dir that may have whiteouts */ - if (S_ISDIR(realinode->i_mode)) { + if (is_dir) { struct ovl_entry *oe = dentry->d_fsdata;
if (((upperdentry && lowerdentry) || oe->numlower > 1) || diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 76440feb79f6..1a436fa92a04 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -211,6 +211,7 @@ static void ovl_destroy_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode);
dput(oi->__upperdentry); + iput(oi->lower); kfree(oi->redirect); ovl_dir_cache_free(inode); mutex_destroy(&oi->lock); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index d6bb1c9f5e7a..06119f34a69d 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -257,7 +257,7 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, if (upperdentry) OVL_I(inode)->__upperdentry = upperdentry; if (lowerdentry) - OVL_I(inode)->lower = d_inode(lowerdentry); + OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode); } @@ -273,7 +273,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) */ smp_wmb(); OVL_I(inode)->__upperdentry = upperdentry; - if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) { + if (inode_unhashed(inode)) { inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); }
On Thu, Feb 15, 2018 at 4:31 PM, gregkh@linuxfoundation.org wrote:
The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
Hi Niklas,
The conflict resolution of this patch for v4.14 is trivial (just dropping last hunk of inode.c patch), but I no means to test this right now.
Are you able and/or interested to test the attached backport path for stable kernel v4.14?
I have posted an LTP test (inotify07) to test this fix if anyone else is interested in testing: https://github.com/linux-test-project/ltp/pull/246
Thanks, Amir.
------------------ original commit in Linus's tree ------------------
From 31747eda41ef3c30c09c5c096b380bf54013746a Mon Sep 17 00:00:00 2001 From: Amir Goldstein amir73il@gmail.com Date: Sun, 14 Jan 2018 18:35:40 +0200 Subject: [PATCH] ovl: hash directory inodes for fsnotify
fsnotify pins a watched directory inode in cache, but if directory dentry is released, new lookup will allocate a new dentry and a new inode. Directory events will be notified on the new inode, while fsnotify listener is watching the old pinned inode.
Hash all directory inodes to reuse the pinned inode on lookup. Pure upper dirs are hashes by real upper inode, merge and lower dirs are hashed by real lower inode.
The reference to lower inode was being held by the lower dentry object in the overlay dentry (oe->lowerstack[0]). Releasing the overlay dentry may drop lower inode refcount to zero. Add a refcount on behalf of the overlay inode to prevent that.
As a by-product, hashing directory inodes also detects multiple redirected dirs to the same lower dir and uncovered redirected dir target on and returns -ESTALE on lookup.
The reported issue dates back to initial version of overlayfs, but this patch depends on ovl_inode code that was introduced in kernel v4.13.
Cc: stable@vger.kernel.org #v4.13 Reported-by: Niklas Cassel niklas.cassel@axis.com Signed-off-by: Amir Goldstein amir73il@gmail.com Signed-off-by: Miklos Szeredi mszeredi@redhat.com Tested-by: Niklas Cassel niklas.cassel@axis.com
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 00b6b294272a..94d2f8a8b779 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -606,6 +606,16 @@ static int ovl_inode_set(struct inode *inode, void *data) static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, struct dentry *upperdentry) {
if (S_ISDIR(inode->i_mode)) {
/* Real lower dir moved to upper layer under us? */
if (!lowerdentry && ovl_inode_lower(inode))
return false;
/* Lookup of an uncovered redirect origin? */
if (!upperdentry && ovl_inode_upper(inode))
return false;
}
/* * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. * This happens when finding a copied up overlay inode for a renamed
@@ -633,6 +643,8 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, struct inode *inode; /* Already indexed or could be indexed on copy up? */ bool indexed = (index || (ovl_indexdir(dentry->d_sb) && !upperdentry));
struct dentry *origin = indexed ? lowerdentry : NULL;
bool is_dir; if (WARN_ON(upperdentry && indexed && !lowerdentry)) return ERR_PTR(-EIO);
@@ -641,15 +653,19 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, realinode = d_inode(lowerdentry);
/*
* Copy up origin (lower) may exist for non-indexed upper, but we must
* not use lower as hash key in that case.
* Hash inodes that are or could be indexed by origin inode and
* non-indexed upper inodes that could be hard linked by upper inode.
* Copy up origin (lower) may exist for non-indexed non-dir upper, but
* we must not use lower as hash key in that case.
* Hash non-dir that is or could be indexed by origin inode.
* Hash dir that is or could be merged by origin inode.
* Hash pure upper and non-indexed non-dir by upper inode. */
if (!S_ISDIR(realinode->i_mode) && (upperdentry || indexed)) {
struct inode *key = d_inode(indexed ? lowerdentry :
upperdentry);
unsigned int nlink;
is_dir = S_ISDIR(realinode->i_mode);
if (is_dir)
origin = lowerdentry;
if (upperdentry || origin) {
struct inode *key = d_inode(origin ?: upperdentry);
unsigned int nlink = is_dir ? 1 : realinode->i_nlink; inode = iget5_locked(dentry->d_sb, (unsigned long) key, ovl_inode_test, ovl_inode_set, key);
@@ -670,8 +686,9 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, goto out; }
nlink = ovl_get_nlink(lowerdentry, upperdentry,
realinode->i_nlink);
/* Recalculate nlink for non-dir due to indexing */
if (!is_dir)
nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); set_nlink(inode, nlink); } else { inode = new_inode(dentry->d_sb);
@@ -685,7 +702,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, ovl_set_flag(OVL_IMPURE, inode);
/* Check for non-merge dir that may have whiteouts */
if (S_ISDIR(realinode->i_mode)) {
if (is_dir) { struct ovl_entry *oe = dentry->d_fsdata; if (((upperdentry && lowerdentry) || oe->numlower > 1) ||
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 76440feb79f6..1a436fa92a04 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -211,6 +211,7 @@ static void ovl_destroy_inode(struct inode *inode) struct ovl_inode *oi = OVL_I(inode);
dput(oi->__upperdentry);
iput(oi->lower); kfree(oi->redirect); ovl_dir_cache_free(inode); mutex_destroy(&oi->lock);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index d6bb1c9f5e7a..06119f34a69d 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -257,7 +257,7 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry, if (upperdentry) OVL_I(inode)->__upperdentry = upperdentry; if (lowerdentry)
OVL_I(inode)->lower = d_inode(lowerdentry);
OVL_I(inode)->lower = igrab(d_inode(lowerdentry)); ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
} @@ -273,7 +273,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry) */ smp_wmb(); OVL_I(inode)->__upperdentry = upperdentry;
if (!S_ISDIR(upperinode->i_mode) && inode_unhashed(inode)) {
if (inode_unhashed(inode)) { inode->i_private = upperinode; __insert_inode_hash(inode, (unsigned long) upperinode); }
On Thu, Feb 15, 2018 at 05:02:32PM +0200, Amir Goldstein wrote:
On Thu, Feb 15, 2018 at 4:31 PM, gregkh@linuxfoundation.org wrote:
The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
Hi Niklas,
The conflict resolution of this patch for v4.14 is trivial (just dropping last hunk of inode.c patch), but I no means to test this right now.
Are you able and/or interested to test the attached backport path for stable kernel v4.14?
I have posted an LTP test (inotify07) to test this fix if anyone else is interested in testing: https://github.com/linux-test-project/ltp/pull/246
Thanks, Amir.
Hello Amir,
I have already backported+verified the fix to v4.13 in one of our internal trees.
Like you said, I simply dropped the last hunk of the patch that modfied inode.c, i.e. I simply ignored this hunk of the patch:
@@ -685,7 +702,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, ovl_set_flag(OVL_IMPURE, inode);
/* Check for non-merge dir that may have whiteouts */ - if (S_ISDIR(realinode->i_mode)) { + if (is_dir) { struct ovl_entry *oe = dentry->d_fsdata;
if (((upperdentry && lowerdentry) || oe->numlower > 1) ||
Regards, Niklas
On Thu, Feb 15, 2018 at 04:44:31PM +0100, Niklas Cassel wrote:
On Thu, Feb 15, 2018 at 05:02:32PM +0200, Amir Goldstein wrote:
On Thu, Feb 15, 2018 at 4:31 PM, gregkh@linuxfoundation.org wrote:
The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
Hi Niklas,
The conflict resolution of this patch for v4.14 is trivial (just dropping last hunk of inode.c patch), but I no means to test this right now.
Are you able and/or interested to test the attached backport path for stable kernel v4.14?
I have posted an LTP test (inotify07) to test this fix if anyone else is interested in testing: https://github.com/linux-test-project/ltp/pull/246
Thanks, Amir.
Hello Amir,
I have already backported+verified the fix to v4.13 in one of our internal trees.
Like you said, I simply dropped the last hunk of the patch that modfied inode.c, i.e. I simply ignored this hunk of the patch:
@@ -685,7 +702,7 @@ struct inode *ovl_get_inode(struct dentry *dentry, struct dentry *upperdentry, ovl_set_flag(OVL_IMPURE, inode); /* Check for non-merge dir that may have whiteouts */
- if (S_ISDIR(realinode->i_mode)) {
- if (is_dir) { struct ovl_entry *oe = dentry->d_fsdata;
if (((upperdentry && lowerdentry) || oe->numlower > 1) ||
Ok, last hunk dropped, thanks.
greg k-h
linux-stable-mirror@lists.linaro.org