From: Xiubo Li xiubli@redhat.com
The request's r_session maybe changed when it was forwarded or resent.
Cc: stable@vger.kernel.org URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 Signed-off-by: Xiubo Li xiubli@redhat.com --- fs/ceph/caps.c | 88 +++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 55 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 894adfb4a092..172f18f7459d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2297,8 +2297,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; + struct ceph_mds_session *s, **sessions = NULL; unsigned int max_sessions; - int ret, err = 0; + int i, ret, err = 0;
spin_lock(&ci->i_unsafe_lock); if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { @@ -2315,31 +2316,22 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock);
- /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions array memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max_sessions = mdsc->max_sessions; - /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ + mutex_lock(&mdsc->mutex); + max_sessions = mdsc->max_sessions; + sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); + if (!sessions) { + mutex_unlock(&mdsc->mutex); + err = -ENOMEM; + goto out; + } + if ((req1 || req2) && likely(max_sessions)) { - struct ceph_mds_session **sessions = NULL; - struct ceph_mds_session *s; struct ceph_mds_request *req; - int i; - - sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); - if (!sessions) { - err = -ENOMEM; - goto out; - }
spin_lock(&ci->i_unsafe_lock); if (req1) { @@ -2348,16 +2340,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } + if (unlikely(s->s_mds >= max_sessions)) + continue; if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2370,16 +2354,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } + if (unlikely(s->s_mds >= max_sessions)) + continue; if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2387,25 +2363,26 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } } spin_unlock(&ci->i_unsafe_lock); + } + mutex_unlock(&mdsc->mutex);
- /* the auth MDS */ - spin_lock(&ci->i_ceph_lock); - if (ci->i_auth_cap) { - s = ci->i_auth_cap->session; - if (!sessions[s->s_mds]) - sessions[s->s_mds] = ceph_get_mds_session(s); - } - spin_unlock(&ci->i_ceph_lock); + /* the auth MDS */ + spin_lock(&ci->i_ceph_lock); + if (ci->i_auth_cap) { + s = ci->i_auth_cap->session; + if (!sessions[s->s_mds] && + likely(s->s_mds < max_sessions)) + sessions[s->s_mds] = ceph_get_mds_session(s); + } + spin_unlock(&ci->i_ceph_lock);
- /* send flush mdlog request to MDSes */ - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) { - send_flush_mdlog(s); - ceph_put_mds_session(s); - } + /* send flush mdlog request to MDSes */ + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) { + send_flush_mdlog(s); + ceph_put_mds_session(s); } - kfree(sessions); }
dout("%s %p wait on tid %llu %llu\n", __func__, @@ -2428,6 +2405,7 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) ceph_mdsc_put_request(req1); if (req2) ceph_mdsc_put_request(req2); + kfree(sessions); return err; }
On Tue, Nov 8, 2022 at 6:50 AM xiubli@redhat.com wrote:
From: Xiubo Li xiubli@redhat.com
The request's r_session maybe changed when it was forwarded or resent.
Cc: stable@vger.kernel.org URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 Signed-off-by: Xiubo Li xiubli@redhat.com
fs/ceph/caps.c | 88 +++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 55 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 894adfb4a092..172f18f7459d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2297,8 +2297,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL;
struct ceph_mds_session *s, **sessions = NULL;
Hi Xiubo,
Nit: mixing pointers and double pointers coupled with differing initialization is generally frowned upon. Keep it on two lines as before:
struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s;
unsigned int max_sessions;
int ret, err = 0;
int i, ret, err = 0; spin_lock(&ci->i_unsafe_lock); if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) {
@@ -2315,31 +2316,22 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock);
/*
* The mdsc->max_sessions is unlikely to be changed
* mostly, here we will retry it by reallocating the
* sessions array memory to get rid of the mdsc->mutex
* lock.
*/
-retry:
max_sessions = mdsc->max_sessions;
/* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */
mutex_lock(&mdsc->mutex);
max_sessions = mdsc->max_sessions;
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
mutex_unlock(&mdsc->mutex);
err = -ENOMEM;
goto out;
}
if ((req1 || req2) && likely(max_sessions)) {
Just curious, when can max_sessions be zero here?
struct ceph_mds_session **sessions = NULL;
struct ceph_mds_session *s; struct ceph_mds_request *req;
int i;
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
err = -ENOMEM;
goto out;
} spin_lock(&ci->i_unsafe_lock); if (req1) {
@@ -2348,16 +2340,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s)
ceph_put_mds_session(s);
}
kfree(sessions);
goto retry;
}
if (unlikely(s->s_mds >= max_sessions))
continue;
Nit: this could be combined with the previous condition:
if (!s || unlikely(s->s_mds >= max_sessions)) continue;
if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s;
@@ -2370,16 +2354,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s)
ceph_put_mds_session(s);
}
kfree(sessions);
goto retry;
}
if (unlikely(s->s_mds >= max_sessions))
continue;
ditto
if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s;
@@ -2387,25 +2363,26 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } } spin_unlock(&ci->i_unsafe_lock);
}
mutex_unlock(&mdsc->mutex);
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
if (ci->i_auth_cap) {
s = ci->i_auth_cap->session;
if (!sessions[s->s_mds])
sessions[s->s_mds] = ceph_get_mds_session(s);
}
spin_unlock(&ci->i_ceph_lock);
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
Why was this "auth MDS" block moved outside of max_sessions > 0 branch? Logically, it very much belongs there. Is there a problem with taking ci->i_ceph_lock under mdsc->mutex?
if (ci->i_auth_cap) {
s = ci->i_auth_cap->session;
if (!sessions[s->s_mds] &&
likely(s->s_mds < max_sessions))
This is wrong: s->s_mds must be checked against max_sessions before indexing into sessions array. Also, the entire condition should fit on a single line.
sessions[s->s_mds] = ceph_get_mds_session(s);
}
spin_unlock(&ci->i_ceph_lock);
/* send flush mdlog request to MDSes */
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s) {
send_flush_mdlog(s);
ceph_put_mds_session(s);
}
/* send flush mdlog request to MDSes */
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s) {
send_flush_mdlog(s);
ceph_put_mds_session(s); }
kfree(sessions); } dout("%s %p wait on tid %llu %llu\n", __func__,
@@ -2428,6 +2405,7 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) ceph_mdsc_put_request(req1); if (req2) ceph_mdsc_put_request(req2);
kfree(sessions);
Nit: since sessions array is allocated after references to req1 and req2 are grabbed, I would free it before these references are put.
Thanks,
Ilya
On 08/11/2022 18:50, Ilya Dryomov wrote:
On Tue, Nov 8, 2022 at 6:50 AM xiubli@redhat.com wrote:
From: Xiubo Li xiubli@redhat.com
The request's r_session maybe changed when it was forwarded or resent.
Cc: stable@vger.kernel.org URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 Signed-off-by: Xiubo Li xiubli@redhat.com
fs/ceph/caps.c | 88 +++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 55 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 894adfb4a092..172f18f7459d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2297,8 +2297,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL;
struct ceph_mds_session *s, **sessions = NULL;
Hi Xiubo,
Nit: mixing pointers and double pointers coupled with differing initialization is generally frowned upon. Keep it on two lines as before:
struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s;
Sure, will fix it.
unsigned int max_sessions;
int ret, err = 0;
int i, ret, err = 0; spin_lock(&ci->i_unsafe_lock); if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) {
@@ -2315,31 +2316,22 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock);
/*
* The mdsc->max_sessions is unlikely to be changed
* mostly, here we will retry it by reallocating the
* sessions array memory to get rid of the mdsc->mutex
* lock.
*/
-retry:
max_sessions = mdsc->max_sessions;
/* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */
mutex_lock(&mdsc->mutex);
max_sessions = mdsc->max_sessions;
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
mutex_unlock(&mdsc->mutex);
err = -ENOMEM;
goto out;
}
if ((req1 || req2) && likely(max_sessions)) {
Just curious, when can max_sessions be zero here?
Checked the code again, just before registering the first session, and this is monotone increasing. It should be safe to remove this here.
struct ceph_mds_session **sessions = NULL;
struct ceph_mds_session *s; struct ceph_mds_request *req;
int i;
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
err = -ENOMEM;
goto out;
} spin_lock(&ci->i_unsafe_lock); if (req1) {
@@ -2348,16 +2340,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s)
ceph_put_mds_session(s);
}
kfree(sessions);
goto retry;
}
if (unlikely(s->s_mds >= max_sessions))
continue;
Nit: this could be combined with the previous condition:
if (!s || unlikely(s->s_mds >= max_sessions)) continue;
Sure.
if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s;
@@ -2370,16 +2354,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s)
ceph_put_mds_session(s);
}
kfree(sessions);
goto retry;
}
if (unlikely(s->s_mds >= max_sessions))
continue;
ditto
if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s;
@@ -2387,25 +2363,26 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } } spin_unlock(&ci->i_unsafe_lock);
}
mutex_unlock(&mdsc->mutex);
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
if (ci->i_auth_cap) {
s = ci->i_auth_cap->session;
if (!sessions[s->s_mds])
sessions[s->s_mds] = ceph_get_mds_session(s);
}
spin_unlock(&ci->i_ceph_lock);
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
Why was this "auth MDS" block moved outside of max_sessions > 0 branch? Logically, it very much belongs there. Is there a problem with taking ci->i_ceph_lock under mdsc->mutex?
I will remove the 'likely(max_session)' and there is no any problem for this.
if (ci->i_auth_cap) {
s = ci->i_auth_cap->session;
if (!sessions[s->s_mds] &&
likely(s->s_mds < max_sessions))
This is wrong: s->s_mds must be checked against max_sessions before indexing into sessions array. Also, the entire condition should fit on a single line.
I am moving it to the if(req1 || req2) {} scope and it will exceed 80 chars. And will keep it in two lines.
sessions[s->s_mds] = ceph_get_mds_session(s);
}
spin_unlock(&ci->i_ceph_lock);
/* send flush mdlog request to MDSes */
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s) {
send_flush_mdlog(s);
ceph_put_mds_session(s);
}
/* send flush mdlog request to MDSes */
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s) {
send_flush_mdlog(s);
ceph_put_mds_session(s); }
kfree(sessions); } dout("%s %p wait on tid %llu %llu\n", __func__,
@@ -2428,6 +2405,7 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) ceph_mdsc_put_request(req1); if (req2) ceph_mdsc_put_request(req2);
kfree(sessions);
Nit: since sessions array is allocated after references to req1 and req2 are grabbed, I would free it before these references are put.
Sure!
Thanks!
- Xiubo
Thanks,
Ilya
On Tue, Nov 8, 2022 at 1:38 PM Xiubo Li xiubli@redhat.com wrote:
On 08/11/2022 18:50, Ilya Dryomov wrote:
On Tue, Nov 8, 2022 at 6:50 AM xiubli@redhat.com wrote:
From: Xiubo Li xiubli@redhat.com
The request's r_session maybe changed when it was forwarded or resent.
Cc: stable@vger.kernel.org URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 Signed-off-by: Xiubo Li xiubli@redhat.com
fs/ceph/caps.c | 88 +++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 55 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 894adfb4a092..172f18f7459d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2297,8 +2297,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL;
struct ceph_mds_session *s, **sessions = NULL;
Hi Xiubo,
Nit: mixing pointers and double pointers coupled with differing initialization is generally frowned upon. Keep it on two lines as before:
struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s;
Sure, will fix it.
unsigned int max_sessions;
int ret, err = 0;
int i, ret, err = 0; spin_lock(&ci->i_unsafe_lock); if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) {
@@ -2315,31 +2316,22 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock);
/*
* The mdsc->max_sessions is unlikely to be changed
* mostly, here we will retry it by reallocating the
* sessions array memory to get rid of the mdsc->mutex
* lock.
*/
-retry:
max_sessions = mdsc->max_sessions;
/* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */
mutex_lock(&mdsc->mutex);
max_sessions = mdsc->max_sessions;
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
mutex_unlock(&mdsc->mutex);
err = -ENOMEM;
goto out;
}
if ((req1 || req2) && likely(max_sessions)) {
Just curious, when can max_sessions be zero here?
Checked the code again, just before registering the first session, and this is monotone increasing. It should be safe to remove this here.
struct ceph_mds_session **sessions = NULL;
struct ceph_mds_session *s; struct ceph_mds_request *req;
int i;
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
err = -ENOMEM;
goto out;
} spin_lock(&ci->i_unsafe_lock); if (req1) {
@@ -2348,16 +2340,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s)
ceph_put_mds_session(s);
}
kfree(sessions);
goto retry;
}
if (unlikely(s->s_mds >= max_sessions))
continue;
Nit: this could be combined with the previous condition:
if (!s || unlikely(s->s_mds >= max_sessions)) continue;
Sure.
if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s;
@@ -2370,16 +2354,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s)
ceph_put_mds_session(s);
}
kfree(sessions);
goto retry;
}
if (unlikely(s->s_mds >= max_sessions))
continue;
ditto
if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s;
@@ -2387,25 +2363,26 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } } spin_unlock(&ci->i_unsafe_lock);
}
mutex_unlock(&mdsc->mutex);
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
if (ci->i_auth_cap) {
s = ci->i_auth_cap->session;
if (!sessions[s->s_mds])
sessions[s->s_mds] = ceph_get_mds_session(s);
}
spin_unlock(&ci->i_ceph_lock);
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
Why was this "auth MDS" block moved outside of max_sessions > 0 branch? Logically, it very much belongs there. Is there a problem with taking ci->i_ceph_lock under mdsc->mutex?
I will remove the 'likely(max_session)' and there is no any problem for this.
if (ci->i_auth_cap) {
s = ci->i_auth_cap->session;
if (!sessions[s->s_mds] &&
likely(s->s_mds < max_sessions))
This is wrong: s->s_mds must be checked against max_sessions before indexing into sessions array. Also, the entire condition should fit on a single line.
I am moving it to the if(req1 || req2) {} scope and it will exceed 80 chars. And will keep it in two lines.
If you are removing max_session > 0 condition, I don't see a need to move this to "if (req1 || req2)" scope. I suggested that only because existing code was explicitly guarding against max_session == 0.
Thanks,
Ilya
On 08/11/2022 20:44, Ilya Dryomov wrote:
On Tue, Nov 8, 2022 at 1:38 PM Xiubo Li xiubli@redhat.com wrote:
On 08/11/2022 18:50, Ilya Dryomov wrote:
On Tue, Nov 8, 2022 at 6:50 AM xiubli@redhat.com wrote:
From: Xiubo Li xiubli@redhat.com
The request's r_session maybe changed when it was forwarded or resent.
Cc: stable@vger.kernel.org URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 Signed-off-by: Xiubo Li xiubli@redhat.com
fs/ceph/caps.c | 88 +++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 55 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 894adfb4a092..172f18f7459d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2297,8 +2297,9 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL;
struct ceph_mds_session *s, **sessions = NULL;
Hi Xiubo,
Nit: mixing pointers and double pointers coupled with differing initialization is generally frowned upon. Keep it on two lines as before:
struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s;
Sure, will fix it.
unsigned int max_sessions;
int ret, err = 0;
int i, ret, err = 0; spin_lock(&ci->i_unsafe_lock); if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) {
@@ -2315,31 +2316,22 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock);
/*
* The mdsc->max_sessions is unlikely to be changed
* mostly, here we will retry it by reallocating the
* sessions array memory to get rid of the mdsc->mutex
* lock.
*/
-retry:
max_sessions = mdsc->max_sessions;
/* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */
mutex_lock(&mdsc->mutex);
max_sessions = mdsc->max_sessions;
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
mutex_unlock(&mdsc->mutex);
err = -ENOMEM;
goto out;
}
if ((req1 || req2) && likely(max_sessions)) {
Just curious, when can max_sessions be zero here?
Checked the code again, just before registering the first session, and this is monotone increasing. It should be safe to remove this here.
struct ceph_mds_session **sessions = NULL;
struct ceph_mds_session *s; struct ceph_mds_request *req;
int i;
sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL);
if (!sessions) {
err = -ENOMEM;
goto out;
} spin_lock(&ci->i_unsafe_lock); if (req1) {
@@ -2348,16 +2340,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s)
ceph_put_mds_session(s);
}
kfree(sessions);
goto retry;
}
if (unlikely(s->s_mds >= max_sessions))
continue;
Nit: this could be combined with the previous condition:
if (!s || unlikely(s->s_mds >= max_sessions)) continue;
Sure.
if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s;
@@ -2370,16 +2354,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue;
if (unlikely(s->s_mds >= max_sessions)) {
spin_unlock(&ci->i_unsafe_lock);
for (i = 0; i < max_sessions; i++) {
s = sessions[i];
if (s)
ceph_put_mds_session(s);
}
kfree(sessions);
goto retry;
}
if (unlikely(s->s_mds >= max_sessions))
continue;
ditto
if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s;
@@ -2387,25 +2363,26 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } } spin_unlock(&ci->i_unsafe_lock);
}
mutex_unlock(&mdsc->mutex);
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
if (ci->i_auth_cap) {
s = ci->i_auth_cap->session;
if (!sessions[s->s_mds])
sessions[s->s_mds] = ceph_get_mds_session(s);
}
spin_unlock(&ci->i_ceph_lock);
/* the auth MDS */
spin_lock(&ci->i_ceph_lock);
Why was this "auth MDS" block moved outside of max_sessions > 0 branch? Logically, it very much belongs there. Is there a problem with taking ci->i_ceph_lock under mdsc->mutex?
I will remove the 'likely(max_session)' and there is no any problem for this.
if (ci->i_auth_cap) {
s = ci->i_auth_cap->session;
if (!sessions[s->s_mds] &&
likely(s->s_mds < max_sessions))
This is wrong: s->s_mds must be checked against max_sessions before indexing into sessions array. Also, the entire condition should fit on a single line.
I am moving it to the if(req1 || req2) {} scope and it will exceed 80 chars. And will keep it in two lines.
If you are removing max_session > 0 condition, I don't see a need to move this to "if (req1 || req2)" scope. I suggested that only because existing code was explicitly guarding against max_session == 0.
I checked old code again, I think we should move it to this scope. Because only when both req1 and req2 are not NULL will it make sense to send the mdlog flushing request to MDS.
Thanks!
- Xiubo
Thanks,
Ilya
linux-stable-mirror@lists.linaro.org