The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d42334578eba1390859012ebb91e1e556d51db49
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080735-headsman-grandpa-be68@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
d42334578eba ("exfat: check if filename entries exceeds max filename length")
20914ff6dd56 ("exfat: move exfat_entry_set_cache from heap to stack")
a3ff29a95fde ("exfat: support dynamic allocate bh for exfat_entry_set_cache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d42334578eba1390859012ebb91e1e556d51db49 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon(a)kernel.org>
Date: Thu, 13 Jul 2023 21:59:37 +0900
Subject: [PATCH] exfat: check if filename entries exceeds max filename length
exfat_extract_uni_name copies characters from a given file name entry into
the 'uniname' variable. This variable is actually defined on the stack of
the exfat_readdir() function. According to the definition of
the 'exfat_uni_name' type, the file name should be limited 255 characters
(+ null teminator space), but the exfat_get_uniname_from_ext_entry()
function can write more characters because there is no check if filename
entries exceeds max filename length. This patch add the check not to copy
filename characters when exceeding max filename length.
Cc: stable(a)vger.kernel.org
Cc: Yuezhang Mo <Yuezhang.Mo(a)sony.com>
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon(a)kernel.org>
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 957574180a5e..bc48f3329921 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -34,6 +34,7 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
{
int i, err;
struct exfat_entry_set_cache es;
+ unsigned int uni_len = 0, len;
err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES);
if (err)
@@ -52,7 +53,10 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
if (exfat_get_entry_type(ep) != TYPE_EXTEND)
break;
- exfat_extract_uni_name(ep, uniname);
+ len = exfat_extract_uni_name(ep, uniname);
+ uni_len += len;
+ if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH)
+ break;
uniname += EXFAT_FILE_NAME_LEN;
}
@@ -1079,7 +1083,8 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
if (entry_type == TYPE_EXTEND) {
unsigned short entry_uniname[16], unichar;
- if (step != DIRENT_STEP_NAME) {
+ if (step != DIRENT_STEP_NAME ||
+ name_len >= MAX_NAME_LENGTH) {
step = DIRENT_STEP_FILE;
continue;
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x d42334578eba1390859012ebb91e1e556d51db49
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080734-remarry-tamer-aabe@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
d42334578eba ("exfat: check if filename entries exceeds max filename length")
20914ff6dd56 ("exfat: move exfat_entry_set_cache from heap to stack")
a3ff29a95fde ("exfat: support dynamic allocate bh for exfat_entry_set_cache")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d42334578eba1390859012ebb91e1e556d51db49 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon(a)kernel.org>
Date: Thu, 13 Jul 2023 21:59:37 +0900
Subject: [PATCH] exfat: check if filename entries exceeds max filename length
exfat_extract_uni_name copies characters from a given file name entry into
the 'uniname' variable. This variable is actually defined on the stack of
the exfat_readdir() function. According to the definition of
the 'exfat_uni_name' type, the file name should be limited 255 characters
(+ null teminator space), but the exfat_get_uniname_from_ext_entry()
function can write more characters because there is no check if filename
entries exceeds max filename length. This patch add the check not to copy
filename characters when exceeding max filename length.
Cc: stable(a)vger.kernel.org
Cc: Yuezhang Mo <Yuezhang.Mo(a)sony.com>
Reported-by: Maxim Suhanov <dfirblog(a)gmail.com>
Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon(a)kernel.org>
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 957574180a5e..bc48f3329921 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -34,6 +34,7 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
{
int i, err;
struct exfat_entry_set_cache es;
+ unsigned int uni_len = 0, len;
err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES);
if (err)
@@ -52,7 +53,10 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
if (exfat_get_entry_type(ep) != TYPE_EXTEND)
break;
- exfat_extract_uni_name(ep, uniname);
+ len = exfat_extract_uni_name(ep, uniname);
+ uni_len += len;
+ if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH)
+ break;
uniname += EXFAT_FILE_NAME_LEN;
}
@@ -1079,7 +1083,8 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
if (entry_type == TYPE_EXTEND) {
unsigned short entry_uniname[16], unichar;
- if (step != DIRENT_STEP_NAME) {
+ if (step != DIRENT_STEP_NAME ||
+ name_len >= MAX_NAME_LENGTH) {
step = DIRENT_STEP_FILE;
continue;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x e7e607bd00481745550389a29ecabe33e13d67cf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080735-sprang-moonlit-32f2@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
e7e607bd0048 ("ceph: defer stopping mdsc delayed_work")
470a5c77eac0 ("ceph: use kill_anon_super helper")
fa9967734227 ("ceph: fix potential mdsc use-after-free crash")
3a3430affce5 ("ceph: show tasks waiting on caps in debugfs caps file")
7b2f936fc828 ("ceph: fix error handling in ceph_get_caps()")
1199d7da2d29 ("ceph: simplify arguments and return semantics of try_get_cap_refs")
ff4a80bf2d3f ("ceph: dump granular cap info in "caps" debugfs file")
0c44a8e0fc55 ("ceph: quota: fix quota subdir mounts")
2ee9dd958d47 ("ceph: add non-blocking parameter to ceph_try_get_caps()")
a57d9064e4ee ("ceph: flush pending works before shutdown super")
9122eed5281e ("ceph: quota: report root dir quota usage in statfs")
d557c48db730 ("ceph: quota: add counter for snaprealms with quota")
e3161f17d926 ("ceph: quota: cache inode pointer in ceph_snap_realm")
0eb6bbe4d9cf ("ceph: fix root quota realm check")
2596366907f8 ("ceph: don't check quota for snap inode")
1ab302a0cb14 ("ceph: quota: update MDS when max_bytes is approaching")
2b83845f8bd7 ("ceph: quota: support for ceph.quota.max_bytes")
cafe21a4fb30 ("ceph: quota: don't allow cross-quota renames")
b7a2921765cf ("ceph: quota: support for ceph.quota.max_files")
fb18a57568c2 ("ceph: quota: add initial infrastructure to support cephfs quotas")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e7e607bd00481745550389a29ecabe33e13d67cf Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Tue, 25 Jul 2023 12:03:59 +0800
Subject: [PATCH] ceph: defer stopping mdsc delayed_work
Flushing the dirty buffer may take a long time if the cluster is
overloaded or if there is network issue. So we should ping the
MDSs periodically to keep alive, else the MDS will blocklist
the kclient.
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/61843
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Milind Changire <mchangir(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 66048a86c480..5fb367b1d4b0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
- if (mdsc->stopping)
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
return;
mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
{
dout("pre_umount\n");
- mdsc->stopping = 1;
+ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 724307ff89cd..86d2965e68a1 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -380,6 +380,11 @@ struct cap_wait {
int want;
};
+enum {
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
/*
* mds client state
*/
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3fc48b43cab0..a5f52013314d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);
+ /*
+ * Though the kill_anon_super() will finally trigger the
+ * sync_filesystem() anyway, we still need to do it here
+ * and then bump the stage of shutdown to stop the work
+ * queue as earlier as possible.
+ */
+ sync_filesystem(s);
+
+ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x e7e607bd00481745550389a29ecabe33e13d67cf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080734-unenvied-relocate-bccd@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
e7e607bd0048 ("ceph: defer stopping mdsc delayed_work")
470a5c77eac0 ("ceph: use kill_anon_super helper")
fa9967734227 ("ceph: fix potential mdsc use-after-free crash")
3a3430affce5 ("ceph: show tasks waiting on caps in debugfs caps file")
7b2f936fc828 ("ceph: fix error handling in ceph_get_caps()")
1199d7da2d29 ("ceph: simplify arguments and return semantics of try_get_cap_refs")
ff4a80bf2d3f ("ceph: dump granular cap info in "caps" debugfs file")
0c44a8e0fc55 ("ceph: quota: fix quota subdir mounts")
2ee9dd958d47 ("ceph: add non-blocking parameter to ceph_try_get_caps()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e7e607bd00481745550389a29ecabe33e13d67cf Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Tue, 25 Jul 2023 12:03:59 +0800
Subject: [PATCH] ceph: defer stopping mdsc delayed_work
Flushing the dirty buffer may take a long time if the cluster is
overloaded or if there is network issue. So we should ping the
MDSs periodically to keep alive, else the MDS will blocklist
the kclient.
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/61843
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Milind Changire <mchangir(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 66048a86c480..5fb367b1d4b0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
- if (mdsc->stopping)
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
return;
mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
{
dout("pre_umount\n");
- mdsc->stopping = 1;
+ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 724307ff89cd..86d2965e68a1 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -380,6 +380,11 @@ struct cap_wait {
int want;
};
+enum {
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
/*
* mds client state
*/
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3fc48b43cab0..a5f52013314d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);
+ /*
+ * Though the kill_anon_super() will finally trigger the
+ * sync_filesystem() anyway, we still need to do it here
+ * and then bump the stage of shutdown to stop the work
+ * queue as earlier as possible.
+ */
+ sync_filesystem(s);
+
+ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x e7e607bd00481745550389a29ecabe33e13d67cf
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080733-snowbound-wagon-56bb@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
e7e607bd0048 ("ceph: defer stopping mdsc delayed_work")
470a5c77eac0 ("ceph: use kill_anon_super helper")
fa9967734227 ("ceph: fix potential mdsc use-after-free crash")
3a3430affce5 ("ceph: show tasks waiting on caps in debugfs caps file")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e7e607bd00481745550389a29ecabe33e13d67cf Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli(a)redhat.com>
Date: Tue, 25 Jul 2023 12:03:59 +0800
Subject: [PATCH] ceph: defer stopping mdsc delayed_work
Flushing the dirty buffer may take a long time if the cluster is
overloaded or if there is network issue. So we should ping the
MDSs periodically to keep alive, else the MDS will blocklist
the kclient.
Cc: stable(a)vger.kernel.org
Link: https://tracker.ceph.com/issues/61843
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
Reviewed-by: Milind Changire <mchangir(a)redhat.com>
Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com>
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 66048a86c480..5fb367b1d4b0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
- if (mdsc->stopping)
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
return;
mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
{
dout("pre_umount\n");
- mdsc->stopping = 1;
+ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 724307ff89cd..86d2965e68a1 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -380,6 +380,11 @@ struct cap_wait {
int want;
};
+enum {
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
/*
* mds client state
*/
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3fc48b43cab0..a5f52013314d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);
+ /*
+ * Though the kill_anon_super() will finally trigger the
+ * sync_filesystem() anyway, we still need to do it here
+ * and then bump the stage of shutdown to stop the work
+ * queue as earlier as possible.
+ */
+ sync_filesystem(s);
+
+ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x e65851989001c0c9ba9177564b13b38201c0854c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080717-repair-pessimism-cb11@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
e65851989001 ("scsi: zfcp: Defer fc_rport blocking until after ADISC response")
8c9db6679be4 ("scsi: zfcp: Fix failed recovery on gone remote port with non-NPIV FCP devices")
5c750d58e9d7 ("scsi: zfcp: workqueue: set description for port work items with their WWPN as context")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e65851989001c0c9ba9177564b13b38201c0854c Mon Sep 17 00:00:00 2001
From: Steffen Maier <maier(a)linux.ibm.com>
Date: Mon, 24 Jul 2023 16:51:56 +0200
Subject: [PATCH] scsi: zfcp: Defer fc_rport blocking until after ADISC
response
Storage devices are free to send RSCNs, e.g. for internal state changes. If
this happens on all connected paths, zfcp risks temporarily losing all
paths at the same time. This has strong requirements on multipath
configuration such as "no_path_retry queue".
Avoid such situations by deferring fc_rport blocking until after the ADISC
response, when any actual state change of the remote port became clear.
The already existing port recovery triggers explicitly block the fc_rport.
The triggers are: on ADISC reject or timeout (typical cable pull case), and
on ADISC indicating that the remote port has changed its WWPN or
the port is meanwhile no longer open.
As a side effect, this also removes a confusing direct function call to
another work item function zfcp_scsi_rport_work() instead of scheduling
that other work item. It was probably done that way to have the rport block
side effect immediate and synchronous to the caller.
Fixes: a2fa0aede07c ("[SCSI] zfcp: Block FC transport rports early on errors")
Cc: stable(a)vger.kernel.org #v2.6.30+
Reviewed-by: Benjamin Block <bblock(a)linux.ibm.com>
Reviewed-by: Fedor Loshakov <loshakov(a)linux.ibm.com>
Signed-off-by: Steffen Maier <maier(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20230724145156.3920244-1-maier@linux.ibm.com
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index f21307537829..4f0d0e55f0d4 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -534,8 +534,7 @@ static void zfcp_fc_adisc_handler(void *data)
/* re-init to undo drop from zfcp_fc_adisc() */
port->d_id = ntoh24(adisc_resp->adisc_port_id);
- /* port is good, unblock rport without going through erp */
- zfcp_scsi_schedule_rport_register(port);
+ /* port is still good, nothing to do */
out:
atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
put_device(&port->dev);
@@ -595,9 +594,6 @@ void zfcp_fc_link_test_work(struct work_struct *work)
int retval;
set_worker_desc("zadisc%16llx", port->wwpn); /* < WORKER_DESC_LEN=24 */
- get_device(&port->dev);
- port->rport_task = RPORT_DEL;
- zfcp_scsi_rport_work(&port->rport_work);
/* only issue one test command at one time per port */
if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST)
> > thing to do is to just special-case S_ISDIR. Not lovely, but whatever.
> >
> > So something like this instead? It's a smaller diff anyway, and it
> > gets the crazy afds/ceph cases right too.
>
> If you really care about this we can do it. But if we can live with just
I see you went with the S_ISDIR thing for now. How do you feel about
adding something like the appended patch (untested) on top of this?
So instead of relying on the inode we could just check f_ops for
iterate/iterate_shared. That should amount to the same thing(*) but
looks cleaner to me. Alternatively we can do the flag thing you
mentioned ofc.
(*) I suffered from a proper cold so my brain is in a half-working state.
There were a few places that were incorrectly testing for whether an
open(2) operation was O_TMPFILE by doing (flags & O_TMPFILE). As
O_TMPFILE is defined as __O_TMPFILE|O_DIRECTORY, this would cause the
code to assume that O_DIRECTORY is equivalent to O_TMPFILE.
The only places where this happened were in RESOLVE_CACHED and
io_uring's checking related to RESOLVE_CACHED, so the only bug this
really fixes is that now O_DIRECTORY will no longer cause RESOLVE_CACHED
to always fail with -EAGAIN (and io_uring will thus be faster when doing
O_DIRECTORY opens).
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
---
Changes in v2:
- fix io_uring's io_openat_force_async as well.
- v1: <https://lore.kernel.org/r/20230806-resolve_cached-o_tmpfile-v1-1-7ba1630846…>
---
Aleksa Sarai (2):
open: make RESOLVE_CACHED correctly test for O_TMPFILE
io_uring: correct check for O_TMPFILE
fs/open.c | 2 +-
io_uring/openclose.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
---
base-commit: bf5ad7af0516cb47121dae1b1c160e4385615274
change-id: 20230806-resolve_cached-o_tmpfile-978cb238bd68
Best regards,
--
Aleksa Sarai <cyphar(a)cyphar.com>
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x b8ada54fa1b83f3b6480d4cced71354301750153
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023062303-crazily-recent-78b0@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b8ada54fa1b83f3b6480d4cced71354301750153 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov(a)omp.ru>
Date: Sat, 17 Jun 2023 23:36:12 +0300
Subject: [PATCH] mmc: meson-gx: fix deferred probing
The driver overrides the error codes and IRQ0 returned by platform_get_irq()
to -EINVAL, so if it returns -EPROBE_DEFER, the driver will fail the probe
permanently instead of the deferred probing. Switch to propagating the error
codes upstream. Since commit ce753ad1549c ("platform: finally disallow IRQ0
in platform_get_irq() and its ilk") IRQ0 is no longer returned by those APIs,
so we now can safely ignore it...
Fixes: cbcaac6d7dd2 ("mmc: meson-gx-mmc: Fix platform_get_irq's error checking")
Cc: stable(a)vger.kernel.org # v5.19+
Signed-off-by: Sergey Shtylyov <s.shtylyov(a)omp.ru>
Reviewed-by: Neil Armstrong <neil.armstrong(a)linaro.org>
Link: https://lore.kernel.org/r/20230617203622.6812-3-s.shtylyov@omp.ru
Signed-off-by: Ulf Hansson <ulf.hansson(a)linaro.org>
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index f90b0fd8d8b0..ee9a25b900ae 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -1186,8 +1186,8 @@ static int meson_mmc_probe(struct platform_device *pdev)
return PTR_ERR(host->regs);
host->irq = platform_get_irq(pdev, 0);
- if (host->irq <= 0)
- return -EINVAL;
+ if (host->irq < 0)
+ return host->irq;
cd_irq = platform_get_irq_optional(pdev, 1);
mmc_gpio_set_cd_irq(mmc, cd_irq);
The patch titled
Subject: nilfs2: fix general protection fault in nilfs_lookup_dirty_data_buffers()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
nilfs2-fix-general-protection-fault-in-nilfs_lookup_dirty_data_buffers.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix general protection fault in nilfs_lookup_dirty_data_buffers()
Date: Sat, 5 Aug 2023 22:20:38 +0900
A syzbot stress test reported that create_empty_buffers() called from
nilfs_lookup_dirty_data_buffers() can cause a general protection fault.
Analysis using its reproducer revealed that the back reference "mapping"
from a page/folio has been changed to NULL after dirty page/folio gang
lookup in nilfs_lookup_dirty_data_buffers().
Fix this issue by excluding pages/folios from being collected if, after
acquiring a lock on each page/folio, its back reference "mapping" differs
from the pointer to the address space struct that held the page/folio.
Link: https://lkml.kernel.org/r/20230805132038.6435-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+0ad741797f4565e7e2d2(a)syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/0000000000002930a705fc32b231@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/fs/nilfs2/segment.c~nilfs2-fix-general-protection-fault-in-nilfs_lookup_dirty_data_buffers
+++ a/fs/nilfs2/segment.c
@@ -725,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_bu
struct folio *folio = fbatch.folios[i];
folio_lock(folio);
+ if (unlikely(folio->mapping != mapping)) {
+ /* Exclude folios removed from the address space */
+ folio_unlock(folio);
+ continue;
+ }
head = folio_buffers(folio);
if (!head) {
create_empty_buffers(&folio->page, i_blocksize(inode), 0);
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-fix-general-protection-fault-in-nilfs_lookup_dirty_data_buffers.patch
The patch titled
Subject: mm/gup: handle cont-PTE hugetlb pages correctly in gup_must_unshare() via GUP-fast
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/gup: handle cont-PTE hugetlb pages correctly in gup_must_unshare() via GUP-fast
Date: Sat, 5 Aug 2023 12:12:56 +0200
In contrast to most other GUP code, GUP-fast common page table walking
code like gup_pte_range() also handles hugetlb pages. But in contrast to
other hugetlb page table walking code, it does not look at the hugetlb PTE
abstraction whereby we have only a single logical hugetlb PTE per hugetlb
page, even when using multiple cont-PTEs underneath -- which is for
example what huge_ptep_get() abstracts.
So when we have a hugetlb page that is mapped via cont-PTEs, GUP-fast
might stumble over a PTE that does not map the head page of a hugetlb page
-- not the first "head" PTE of such a cont mapping.
Logically, the whole hugetlb page is mapped (entire_mapcount == 1), but we
might end up calling gup_must_unshare() with a tail page of a hugetlb
page.
We only maintain a single PageAnonExclusive flag per hugetlb page (as
hugetlb pages cannot get partially COW-shared), stored for the head page.
That flag is clear for all tail pages.
So when gup_must_unshare() ends up calling PageAnonExclusive() with a tail
page of a hugetlb page:
1) With CONFIG_DEBUG_VM_PGFLAGS
Stumbles over the:
VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
For example, when executing the COW selftests with 64k hugetlb pages on
arm64:
[ 61.082187] page:00000000829819ff refcount:3 mapcount:1 mapping:0000000000000000 index:0x1 pfn:0x11ee11
[ 61.082842] head:0000000080f79bf7 order:4 entire_mapcount:1 nr_pages_mapped:0 pincount:2
[ 61.083384] anon flags: 0x17ffff80003000e(referenced|uptodate|dirty|head|mappedtodisk|node=0|zone=2|lastcpupid=0xfffff)
[ 61.084101] page_type: 0xffffffff()
[ 61.084332] raw: 017ffff800000000 fffffc00037b8401 0000000000000402 0000000200000000
[ 61.084840] raw: 0000000000000010 0000000000000000 00000000ffffffff 0000000000000000
[ 61.085359] head: 017ffff80003000e ffffd9e95b09b788 ffffd9e95b09b788 ffff0007ff63cf71
[ 61.085885] head: 0000000000000000 0000000000000002 00000003ffffffff 0000000000000000
[ 61.086415] page dumped because: VM_BUG_ON_PAGE(PageHuge(page) && !PageHead(page))
[ 61.086914] ------------[ cut here ]------------
[ 61.087220] kernel BUG at include/linux/page-flags.h:990!
[ 61.087591] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
[ 61.087999] Modules linked in: ...
[ 61.089404] CPU: 0 PID: 4612 Comm: cow Kdump: loaded Not tainted 6.5.0-rc4+ #3
[ 61.089917] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
[ 61.090409] pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 61.090897] pc : gup_must_unshare.part.0+0x64/0x98
[ 61.091242] lr : gup_must_unshare.part.0+0x64/0x98
[ 61.091592] sp : ffff8000825eb940
[ 61.091826] x29: ffff8000825eb940 x28: 0000000000000000 x27: fffffc00037b8440
[ 61.092329] x26: 0400000000000001 x25: 0000000000080101 x24: 0000000000080000
[ 61.092835] x23: 0000000000080100 x22: ffff0000cffb9588 x21: ffff0000c8ec6b58
[ 61.093341] x20: 0000ffffad6b1000 x19: fffffc00037b8440 x18: ffffffffffffffff
[ 61.093850] x17: 2864616548656761 x16: 5021202626202965 x15: 6761702865677548
[ 61.094358] x14: 6567615028454741 x13: 2929656761702864 x12: 6165486567615021
[ 61.094858] x11: 00000000ffff7fff x10: 00000000ffff7fff x9 : ffffd9e958b7a1c0
[ 61.095359] x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 00000000002bffa8
[ 61.095873] x5 : ffff0008bb19e708 x4 : 0000000000000000 x3 : 0000000000000000
[ 61.096380] x2 : 0000000000000000 x1 : ffff0000cf6636c0 x0 : 0000000000000046
[ 61.096894] Call trace:
[ 61.097080] gup_must_unshare.part.0+0x64/0x98
[ 61.097392] gup_pte_range+0x3a8/0x3f0
[ 61.097662] gup_pgd_range+0x1ec/0x280
[ 61.097942] lockless_pages_from_mm+0x64/0x1a0
[ 61.098258] internal_get_user_pages_fast+0xe4/0x1d0
[ 61.098612] pin_user_pages_fast+0x58/0x78
[ 61.098917] pin_longterm_test_start+0xf4/0x2b8
[ 61.099243] gup_test_ioctl+0x170/0x3b0
[ 61.099528] __arm64_sys_ioctl+0xa8/0xf0
[ 61.099822] invoke_syscall.constprop.0+0x7c/0xd0
[ 61.100160] el0_svc_common.constprop.0+0xe8/0x100
[ 61.100500] do_el0_svc+0x38/0xa0
[ 61.100736] el0_svc+0x3c/0x198
[ 61.100971] el0t_64_sync_handler+0x134/0x150
[ 61.101280] el0t_64_sync+0x17c/0x180
[ 61.101543] Code: aa1303e0 f00074c1 912b0021 97fffeb2 (d4210000)
2) Without CONFIG_DEBUG_VM_PGFLAGS
Always detects "not exclusive" for passed tail pages and refuses to PIN
the tail pages R/O, as gup_must_unshare() == true. GUP-fast will fallback
to ordinary GUP. As ordinary GUP properly considers the logical hugetlb
PTE abstraction in hugetlb_follow_page_mask(), pinning the page will
succeed when looking at the PageAnonExclusive on the head page only.
So the only real effect of this is that with cont-PTE hugetlb pages, we'll
always fallback from GUP-fast to ordinary GUP when not working on the head
page, which ends up checking the head page and do the right thing.
Consequently, the cow selftests pass with cont-PTE hugetlb pages as well
without CONFIG_DEBUG_VM_PGFLAGS.
Note that this only applies to anon hugetlb pages that are mapped using
cont-PTEs: for example 64k hugetlb pages on a 4k arm64 kernel.
... and only when R/O-pinning (FOLL_PIN) such pages that are mapped into
the page table R/O using GUP-fast.
On production kernels (and even most debug kernels, that don't set
CONFIG_DEBUG_VM_PGFLAGS) this patch should theoretically not be required
to be backported. But of course, it does not hurt.
Link: https://lkml.kernel.org/r/20230805101256.87306-1-david@redhat.com
Fixes: a7f226604170 ("mm/gup: trigger FAULT_FLAG_UNSHARE when R/O-pinning a possibly shared anonymous page")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reported-by: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/internal.h | 10 ++++++++++
1 file changed, 10 insertions(+)
--- a/mm/internal.h~mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast
+++ a/mm/internal.h
@@ -1005,6 +1005,16 @@ static inline bool gup_must_unshare(stru
smp_rmb();
/*
+ * During GUP-fast we might not get called on the head page for a
+ * hugetlb page that is mapped using cont-PTE, because GUP-fast does
+ * not work with the abstracted hugetlb PTEs that always point at the
+ * head page. For hugetlb, PageAnonExclusive only applies on the head
+ * page (as it cannot be partially COW-shared), so lookup the head page.
+ */
+ if (unlikely(!PageHead(page) && PageHuge(page)))
+ page = compound_head(page);
+
+ /*
* Note that PageKsm() pages cannot be exclusive, and consequently,
* cannot get pinned.
*/
_
Patches currently in -mm which might be from david(a)redhat.com are
mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch
smaps-use-vm_normal_page_pmd-instead-of-follow_trans_huge_pmd.patch
mm-gup-handle-cont-pte-hugetlb-pages-correctly-in-gup_must_unshare-via-gup-fast.patch
mm-memory_hotplug-document-the-signal_pending-check-in-offline_pages.patch
kvm-explicitly-set-foll_honor_numa_fault-in-hva_to_pfn_slow.patch
mm-gup-dont-implicitly-set-foll_honor_numa_fault.patch
pgtable-improve-pte_protnone-comment.patch
selftest-mm-ksm_functional_tests-test-in-mmap_and_merge_range-if-anything-got-merged.patch
selftest-mm-ksm_functional_tests-add-prot_none-test.patch
--
I've reached out to you a couple of times, but I haven't heard back. I'd
appreciate a response to resolve the pending transaction.
All future correspondence should be directed to> a00728298(a)yahoo.com
Yours faithfully
Audit Manager
When a client roamed back to a node before it got time to destroy the
pending local entry (i.e. within the same originator interval) the old
global one is directly removed from hash table and left as such.
But because this entry had an extra reference taken at lookup (i.e using
batadv_tt_global_hash_find) there is no way its memory will be reclaimed
at any time causing the following memory leak:
unreferenced object 0xffff0000073c8000 (size 18560):
comm "softirq", pid 0, jiffies 4294907738 (age 228.644s)
hex dump (first 32 bytes):
06 31 ac 12 c7 7a 05 00 01 00 00 00 00 00 00 00 .1...z..........
2c ad be 08 00 80 ff ff 6c b6 be 08 00 80 ff ff ,.......l.......
backtrace:
[<00000000ee6e0ffa>] kmem_cache_alloc+0x1b4/0x300
[<000000000ff2fdbc>] batadv_tt_global_add+0x700/0xe20
[<00000000443897c7>] _batadv_tt_update_changes+0x21c/0x790
[<000000005dd90463>] batadv_tt_update_changes+0x3c/0x110
[<00000000a2d7fc57>] batadv_tt_tvlv_unicast_handler_v1+0xafc/0xe10
[<0000000011793f2a>] batadv_tvlv_containers_process+0x168/0x2b0
[<00000000b7cbe2ef>] batadv_recv_unicast_tvlv+0xec/0x1f4
[<0000000042aef1d8>] batadv_batman_skb_recv+0x25c/0x3a0
[<00000000bbd8b0a2>] __netif_receive_skb_core.isra.0+0x7a8/0xe90
[<000000004033d428>] __netif_receive_skb_one_core+0x64/0x74
[<000000000f39a009>] __netif_receive_skb+0x48/0xe0
[<00000000f2cd8888>] process_backlog+0x174/0x344
[<00000000507d6564>] __napi_poll+0x58/0x1f4
[<00000000b64ef9eb>] net_rx_action+0x504/0x590
[<00000000056fa5e4>] _stext+0x1b8/0x418
[<00000000878879d6>] run_ksoftirqd+0x74/0xa4
unreferenced object 0xffff00000bae1a80 (size 56):
comm "softirq", pid 0, jiffies 4294910888 (age 216.092s)
hex dump (first 32 bytes):
00 78 b1 0b 00 00 ff ff 0d 50 00 00 00 00 00 00 .x.......P......
00 00 00 00 00 00 00 00 50 c8 3c 07 00 00 ff ff ........P.<.....
backtrace:
[<00000000ee6e0ffa>] kmem_cache_alloc+0x1b4/0x300
[<00000000d9aaa49e>] batadv_tt_global_add+0x53c/0xe20
[<00000000443897c7>] _batadv_tt_update_changes+0x21c/0x790
[<000000005dd90463>] batadv_tt_update_changes+0x3c/0x110
[<00000000a2d7fc57>] batadv_tt_tvlv_unicast_handler_v1+0xafc/0xe10
[<0000000011793f2a>] batadv_tvlv_containers_process+0x168/0x2b0
[<00000000b7cbe2ef>] batadv_recv_unicast_tvlv+0xec/0x1f4
[<0000000042aef1d8>] batadv_batman_skb_recv+0x25c/0x3a0
[<00000000bbd8b0a2>] __netif_receive_skb_core.isra.0+0x7a8/0xe90
[<000000004033d428>] __netif_receive_skb_one_core+0x64/0x74
[<000000000f39a009>] __netif_receive_skb+0x48/0xe0
[<00000000f2cd8888>] process_backlog+0x174/0x344
[<00000000507d6564>] __napi_poll+0x58/0x1f4
[<00000000b64ef9eb>] net_rx_action+0x504/0x590
[<00000000056fa5e4>] _stext+0x1b8/0x418
[<00000000878879d6>] run_ksoftirqd+0x74/0xa4
Releasing the extra reference from batadv_tt_global_hash_find even at
roam back when batadv_tt_global_free is called fixes this memory leak.
Cc: stable(a)vger.kernel.org
Fixes: 068ee6e204e1 ("batman-adv: roaming handling mechanism redesign")
Signed-off-by: Remi Pommarel <repk(a)triplefau.lt>
---
net/batman-adv/translation-table.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 36ca31252a73..b95c36765d04 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -774,7 +774,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
if (roamed_back) {
batadv_tt_global_free(bat_priv, tt_global,
"Roaming canceled");
- tt_global = NULL;
} else {
/* The global entry has to be marked as ROAMING and
* has to be kept for consistency purpose
--
2.40.0
Luiz Capitulino reported the test_verifier test failed:
"precise: ST insn causing spi > allocated_stack".
And it was introduced by the following upstream commit:
ecdf985d7615 ("bpf: track immediate values written to stack by BPF_ST instruction")
Eduard's investigation [4] shows that test failure is not a bug, but a
difference in BPF verifier behavior between upstream, where commits
[1,2,3] by Andrii are present, and 5.10, where these commits are absent.
Backporting strategy is consistent with Eduard in kernel version 6.1 [5],
but with some conflicts in patch #1, #4 and #6 due to the bpf of 5.10
doesn't support more features. Both test_verifier and test_maps have
passed, while test_progs and test_progs-no_alu32 with no new failure
ceses.
Commits of Andrii:
[1] be2ef8161572 ("bpf: allow precision tracking for programs with subprogs")
[2] f63181b6ae79 ("bpf: stop setting precise in current state")
[3] 7a830b53c17b ("bpf: aggressively forget precise markings during state checkpointing")
Links:
[4] https://lore.kernel.org/stable/c9b10a8a551edafdfec855fbd35757c6238ad258.cam…
[5] https://lore.kernel.org/all/20230724124223.1176479-2-eddyz87@gmail.com/
Andrii Nakryiko (4):
bpf: allow precision tracking for programs with subprogs
bpf: stop setting precise in current state
bpf: aggressively forget precise markings during state checkpointing
selftests/bpf: make test_align selftest more robust
Ilya Leoshkevich (1):
selftests/bpf: Fix sk_assign on s390x
Yonghong Song (1):
selftests/bpf: Workaround verification failure for
fexit_bpf2bpf/func_replace_return_code
kernel/bpf/verifier.c | 175 ++++++++++++++++--
.../testing/selftests/bpf/prog_tests/align.c | 36 ++--
.../selftests/bpf/prog_tests/sk_assign.c | 25 ++-
.../selftests/bpf/progs/connect4_prog.c | 2 +-
.../selftests/bpf/progs/test_sk_assign.c | 11 ++
.../bpf/progs/test_sk_assign_libbpf.c | 3 +
6 files changed, 219 insertions(+), 33 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c
--
2.25.1
Here is a new batch of fixes related to MPTCP for v6.5 and older.
Patches 1 and 2 fix issues with MPTCP Join selftest when manually
launched with '-i' parameter to use 'ip mptcp' tool instead of the
dedicated one (pm_nl_ctl). The issues have been there since v5.18.
Thank you Andrea for your first contributions to MPTCP code in the
upstream kernel!
Patch 3 avoids corrupting the data stream when trying to reset
connections that have fallen back to TCP. This can happen from v6.1.
Patch 4 fixes a race when doing a disconnect() and an accept() in
parallel on a listener socket. The issue only happens in rare cases if
the user is really unlucky since a fix that landed in v6.3 but
backported up to v6.1.
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
---
Andrea Claudi (2):
selftests: mptcp: join: fix 'delete and re-add' test
selftests: mptcp: join: fix 'implicit EP' test
Paolo Abeni (2):
mptcp: avoid bogus reset on fallback close
mptcp: fix disconnect vs accept race
net/mptcp/protocol.c | 2 +-
net/mptcp/protocol.h | 1 -
net/mptcp/subflow.c | 60 ++++++++++++-------------
tools/testing/selftests/net/mptcp/mptcp_join.sh | 6 ++-
4 files changed, 35 insertions(+), 34 deletions(-)
---
base-commit: 0f71c9caf26726efea674646f566984e735cc3b9
change-id: 20230803-upstream-net-20230803-misc-fixes-6-5-6046c6ca74b6
Best regards,
--
Matthieu Baerts <matthieu.baerts(a)tessares.net>
Commit 3bcbc20942db ("selftests/rseq: Play nice with binaries statically
linked against glibc 2.35+") which is now in Linus' tree introduced uses
of __weak but did nothing to ensure that a definition is provided for it
resulting in build failures for the rseq tests:
rseq.c:41:1: error: unknown type name '__weak'
__weak ptrdiff_t __rseq_offset;
^
rseq.c:41:17: error: expected ';' after top level declarator
__weak ptrdiff_t __rseq_offset;
^
;
rseq.c:42:1: error: unknown type name '__weak'
__weak unsigned int __rseq_size;
^
rseq.c:43:1: error: unknown type name '__weak'
__weak unsigned int __rseq_flags;
Fix this by using the definition from tools/include compiler.h.
Fixes: 3bcbc20942db ("selftests/rseq: Play nice with binaries statically linked against glibc 2.35+")
Signed-off-by: Mark Brown <broonie(a)kernel.org>
---
It'd be good if the KVM testing could include builds of the rseq
selftests, the KVM tests pull in code from rseq but not the build system
which has resulted in multiple failures like this.
---
tools/testing/selftests/rseq/Makefile | 4 +++-
tools/testing/selftests/rseq/rseq.c | 2 ++
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index b357ba24af06..7a957c7d459a 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -4,8 +4,10 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
CLANG_FLAGS += -no-integrated-as
endif
+top_srcdir = ../../../..
+
CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \
- $(CLANG_FLAGS)
+ $(CLANG_FLAGS) -I$(top_srcdir)/tools/include
LDLIBS += -lpthread -ldl
# Own dependencies because we only want to build against 1st prerequisite, but
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index a723da253244..96e812bdf8a4 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -31,6 +31,8 @@
#include <sys/auxv.h>
#include <linux/auxvec.h>
+#include <linux/compiler.h>
+
#include "../kselftest.h"
#include "rseq.h"
---
base-commit: 5d0c230f1de8c7515b6567d9afba1f196fb4e2f4
change-id: 20230804-kselftest-rseq-build-9d537942b1de
Best regards,
--
Mark Brown <broonie(a)kernel.org>
The quilt patch titled
Subject: mm/damon/core: initialize damo_filter->list from damos_new_filter()
has been removed from the -mm tree. Its filename was
mm-damon-core-initialize-damo_filter-list-from-damos_new_filter.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/core: initialize damo_filter->list from damos_new_filter()
Date: Sat, 29 Jul 2023 20:37:32 +0000
damos_new_filter() is not initializing the list field of newly allocated
filter object. However, DAMON sysfs interface and DAMON_RECLAIM are not
initializing it after calling damos_new_filter(). As a result, accessing
uninitialized memory is possible. Actually, adding multiple DAMOS filters
via DAMON sysfs interface caused NULL pointer dereferencing. Initialize
the field just after the allocation from damos_new_filter().
Link: https://lkml.kernel.org/r/20230729203733.38949-2-sj@kernel.org
Fixes: 98def236f63c ("mm/damon/core: implement damos filter")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/core.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/damon/core.c~mm-damon-core-initialize-damo_filter-list-from-damos_new_filter
+++ a/mm/damon/core.c
@@ -273,6 +273,7 @@ struct damos_filter *damos_new_filter(en
return NULL;
filter->type = type;
filter->matching = matching;
+ INIT_LIST_HEAD(&filter->list);
return filter;
}
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-core-test-add-a-test-for-damos_new_filter.patch
mm-damon-sysfs-schemes-implement-damos-tried-total-bytes-file.patch
mm-damon-sysfs-implement-a-command-for-updating-only-schemes-tried-total-bytes.patch
selftests-damon-sysfs-test-tried_regions-total_bytes-file.patch
docs-abi-damon-update-for-tried_regions-total_bytes.patch
docs-admin-guide-mm-damon-usage-update-for-tried_regions-total_bytes.patch
mm-damon-core-introduce-address-range-type-damos-filter.patch
mm-damon-sysfs-schemes-support-address-range-type-damos-filter.patch
mm-damon-core-test-add-a-unit-test-for-__damos_filter_out.patch
selftests-damon-sysfs-test-address-range-damos-filter.patch
docs-mm-damon-design-update-for-address-range-filters.patch
docs-abi-damon-update-for-address-range-damos-filter.patch
docs-admin-guide-mm-damon-usage-update-for-address-range-type-damos-filter.patch
mm-damon-core-implement-target-type-damos-filter.patch
mm-damon-sysfs-schemes-support-target-damos-filter.patch
selftests-damon-sysfs-test-damon_target-filter.patch
docs-mm-damon-design-update-for-damon-monitoring-target-type-damos-filter.patch
docs-abi-damon-update-for-damon-monitoring-target-type-damos-filter.patch
docs-admin-guide-mm-damon-usage-update-for-damon-monitoring-target-type-damos-filter.patch
The quilt patch titled
Subject: nilfs2: fix use-after-free of nilfs_root in dirtying inodes via iput
has been removed from the -mm tree. Its filename was
nilfs2-fix-use-after-free-of-nilfs_root-in-dirtying-inodes-via-iput.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix use-after-free of nilfs_root in dirtying inodes via iput
Date: Sat, 29 Jul 2023 04:13:18 +0900
During unmount process of nilfs2, nothing holds nilfs_root structure after
nilfs2 detaches its writer in nilfs_detach_log_writer(). Previously,
nilfs_evict_inode() could cause use-after-free read for nilfs_root if
inodes are left in "garbage_list" and released by nilfs_dispose_list at
the end of nilfs_detach_log_writer(), and this bug was fixed by commit
9b5a04ac3ad9 ("nilfs2: fix use-after-free bug of nilfs_root in
nilfs_evict_inode()").
However, it turned out that there is another possibility of UAF in the
call path where mark_inode_dirty_sync() is called from iput():
nilfs_detach_log_writer()
nilfs_dispose_list()
iput()
mark_inode_dirty_sync()
__mark_inode_dirty()
nilfs_dirty_inode()
__nilfs_mark_inode_dirty()
nilfs_load_inode_block() --> causes UAF of nilfs_root struct
This can happen after commit 0ae45f63d4ef ("vfs: add support for a
lazytime mount option"), which changed iput() to call
mark_inode_dirty_sync() on its final reference if i_state has I_DIRTY_TIME
flag and i_nlink is non-zero.
This issue appears after commit 28a65b49eb53 ("nilfs2: do not write dirty
data after degenerating to read-only") when using the syzbot reproducer,
but the issue has potentially existed before.
Fix this issue by adding a "purging flag" to the nilfs structure, setting
that flag while disposing the "garbage_list" and checking it in
__nilfs_mark_inode_dirty().
Unlike commit 9b5a04ac3ad9 ("nilfs2: fix use-after-free bug of nilfs_root
in nilfs_evict_inode()"), this patch does not rely on ns_writer to
determine whether to skip operations, so as not to break recovery on
mount. The nilfs_salvage_orphan_logs routine dirties the buffer of
salvaged data before attaching the log writer, so changing
__nilfs_mark_inode_dirty() to skip the operation when ns_writer is NULL
will cause recovery write to fail. The purpose of using the cleanup-only
flag is to allow for narrowing of such conditions.
Link: https://lkml.kernel.org/r/20230728191318.33047-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+74db8b3087f293d3a13a(a)syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/000000000000b4e906060113fd63@google.com
Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option")
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org> # 4.0+
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/inode.c | 8 ++++++++
fs/nilfs2/segment.c | 2 ++
fs/nilfs2/the_nilfs.h | 2 ++
3 files changed, 12 insertions(+)
--- a/fs/nilfs2/inode.c~nilfs2-fix-use-after-free-of-nilfs_root-in-dirtying-inodes-via-iput
+++ a/fs/nilfs2/inode.c
@@ -1101,9 +1101,17 @@ int nilfs_set_file_dirty(struct inode *i
int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
struct buffer_head *ibh;
int err;
+ /*
+ * Do not dirty inodes after the log writer has been detached
+ * and its nilfs_root struct has been freed.
+ */
+ if (unlikely(nilfs_purging(nilfs)))
+ return 0;
+
err = nilfs_load_inode_block(inode, &ibh);
if (unlikely(err)) {
nilfs_warn(inode->i_sb,
--- a/fs/nilfs2/segment.c~nilfs2-fix-use-after-free-of-nilfs_root-in-dirtying-inodes-via-iput
+++ a/fs/nilfs2/segment.c
@@ -2845,6 +2845,7 @@ void nilfs_detach_log_writer(struct supe
nilfs_segctor_destroy(nilfs->ns_writer);
nilfs->ns_writer = NULL;
}
+ set_nilfs_purging(nilfs);
/* Force to free the list of dirty files */
spin_lock(&nilfs->ns_inode_lock);
@@ -2857,4 +2858,5 @@ void nilfs_detach_log_writer(struct supe
up_write(&nilfs->ns_segctor_sem);
nilfs_dispose_list(nilfs, &garbage_list, 1);
+ clear_nilfs_purging(nilfs);
}
--- a/fs/nilfs2/the_nilfs.h~nilfs2-fix-use-after-free-of-nilfs_root-in-dirtying-inodes-via-iput
+++ a/fs/nilfs2/the_nilfs.h
@@ -29,6 +29,7 @@ enum {
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
THE_NILFS_GC_RUNNING, /* gc process is running */
THE_NILFS_SB_DIRTY, /* super block is dirty */
+ THE_NILFS_PURGING, /* disposing dirty files for cleanup */
};
/**
@@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(DISCONTINUED, discontinued)
THE_NILFS_FNS(GC_RUNNING, gc_running)
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
+THE_NILFS_FNS(PURGING, purging)
/*
* Mount option operations
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
The quilt patch titled
Subject: fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
has been removed from the -mm tree. Its filename was
fs-proc-kcore-reinstate-bounce-buffer-for-kcore_text-regions.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Lorenzo Stoakes <lstoakes(a)gmail.com>
Subject: fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
Date: Mon, 31 Jul 2023 22:50:21 +0100
Some architectures do not populate the entire range categorised by
KCORE_TEXT, so we must ensure that the kernel address we read from is
valid.
Unfortunately there is no solution currently available to do so with a
purely iterator solution so reinstate the bounce buffer in this instance
so we can use copy_from_kernel_nofault() in order to avoid page faults
when regions are unmapped.
This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
the code to continue to use an iterator.
[lstoakes(a)gmail.com: correct comment to be strictly correct about reasoning]
Link: https://lkml.kernel.org/r/525a3f14-74fa-4c22-9fca-9dab4de8a0c3@lucifer.local
Link: https://lkml.kernel.org/r/20230731215021.70911-1-lstoakes@gmail.com
Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
Signed-off-by: Lorenzo Stoakes <lstoakes(a)gmail.com>
Reported-by: Jiri Olsa <olsajiri(a)gmail.com>
Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
Tested-by: Jiri Olsa <jolsa(a)kernel.org>
Tested-by: Will Deacon <will(a)kernel.org>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Liu Shixin <liushixin2(a)huawei.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Mike Galbraith <efault(a)gmx.de>
Cc: Thorsten Leemhuis <regressions(a)leemhuis.info>
Cc: Uladzislau Rezki (Sony) <urezki(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/kcore.c | 30 +++++++++++++++++++++++++++---
1 file changed, 27 insertions(+), 3 deletions(-)
--- a/fs/proc/kcore.c~fs-proc-kcore-reinstate-bounce-buffer-for-kcore_text-regions
+++ a/fs/proc/kcore.c
@@ -309,6 +309,8 @@ static void append_kcore_note(char *note
static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
{
+ struct file *file = iocb->ki_filp;
+ char *buf = file->private_data;
loff_t *fpos = &iocb->ki_pos;
size_t phdrs_offset, notes_offset, data_offset;
size_t page_offline_frozen = 1;
@@ -555,10 +557,21 @@ static ssize_t read_kcore_iter(struct ki
case KCORE_VMEMMAP:
case KCORE_TEXT:
/*
- * We use _copy_to_iter() to bypass usermode hardening
- * which would otherwise prevent this operation.
+ * Sadly we must use a bounce buffer here to be able to
+ * make use of copy_from_kernel_nofault(), as these
+ * memory regions might not always be mapped on all
+ * architectures.
*/
- if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
+ if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+ /*
+ * We know the bounce buffer is safe to copy from, so
+ * use _copy_to_iter() directly.
+ */
+ } else if (_copy_to_iter(buf, tsz, iter) != tsz) {
ret = -EFAULT;
goto out;
}
@@ -595,6 +608,10 @@ static int open_kcore(struct inode *inod
if (ret)
return ret;
+ filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
@@ -605,9 +622,16 @@ static int open_kcore(struct inode *inod
return 0;
}
+static int release_kcore(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
static const struct proc_ops kcore_proc_ops = {
.proc_read_iter = read_kcore_iter,
.proc_open = open_kcore,
+ .proc_release = release_kcore,
.proc_lseek = default_llseek,
};
_
Patches currently in -mm which might be from lstoakes(a)gmail.com are
The quilt patch titled
Subject: selftests: mm: ksm: fix incorrect evaluation of parameter
has been removed from the -mm tree. Its filename was
selftests-mm-ksm-fix-incorrect-evaluation-of-parameter.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ayush Jain <ayush.jain3(a)amd.com>
Subject: selftests: mm: ksm: fix incorrect evaluation of parameter
Date: Fri, 28 Jul 2023 22:09:51 +0530
A missing break in kms_tests leads to kselftest hang when the parameter -s
is used.
In current code flow because of missing break in -s, -t parses args
spilled from -s and as -t accepts only valid values as 0,1 so any arg in
-s >1 or <0, gets in ksm_test failure
This went undetected since, before the addition of option -t, the next
case -M would immediately break out of the switch statement but that is no
longer the case
Add the missing break statement.
----Before----
./ksm_tests -H -s 100
Invalid merge type
----After----
./ksm_tests -H -s 100
Number of normal pages: 0
Number of huge pages: 50
Total size: 100 MiB
Total time: 0.401732682 s
Average speed: 248.922 MiB/s
Link: https://lkml.kernel.org/r/20230728163952.4634-1-ayush.jain3@amd.com
Fixes: 07115fcc15b4 ("selftests/mm: add new selftests for KSM")
Signed-off-by: Ayush Jain <ayush.jain3(a)amd.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: Stefan Roesch <shr(a)devkernel.io>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/ksm_tests.c | 1 +
1 file changed, 1 insertion(+)
--- a/tools/testing/selftests/mm/ksm_tests.c~selftests-mm-ksm-fix-incorrect-evaluation-of-parameter
+++ a/tools/testing/selftests/mm/ksm_tests.c
@@ -831,6 +831,7 @@ int main(int argc, char *argv[])
printf("Size must be greater than 0\n");
return KSFT_FAIL;
}
+ break;
case 't':
{
int tmp = atoi(optarg);
_
Patches currently in -mm which might be from ayush.jain3(a)amd.com are
selftests-mm-add-ksm_merge_time-tests.patch
The quilt patch titled
Subject: hugetlb: do not clear hugetlb dtor until allocating vmemmap
has been removed from the -mm tree. Its filename was
hugetlb-do-not-clear-hugetlb-dtor-until-allocating-vmemmap.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Mike Kravetz <mike.kravetz(a)oracle.com>
Subject: hugetlb: do not clear hugetlb dtor until allocating vmemmap
Date: Tue, 11 Jul 2023 15:09:41 -0700
Patch series "Fix hugetlb free path race with memory errors".
In the discussion of Jiaqi Yan's series "Improve hugetlbfs read on
HWPOISON hugepages" the race window was discovered.
https://lore.kernel.org/linux-mm/20230616233447.GB7371@monkey/
Freeing a hugetlb page back to low level memory allocators is performed
in two steps.
1) Under hugetlb lock, remove page from hugetlb lists and clear destructor
2) Outside lock, allocate vmemmap if necessary and call low level free
Between these two steps, the hugetlb page will appear as a normal
compound page. However, vmemmap for tail pages could be missing.
If a memory error occurs at this time, we could try to update page
flags non-existant page structs.
A much more detailed description is in the first patch.
The first patch addresses the race window. However, it adds a
hugetlb_lock lock/unlock cycle to every vmemmap optimized hugetlb page
free operation. This could lead to slowdowns if one is freeing a large
number of hugetlb pages.
The second path optimizes the update_and_free_pages_bulk routine to only
take the lock once in bulk operations.
The second patch is technically not a bug fix, but includes a Fixes tag
and Cc stable to avoid a performance regression. It can be combined with
the first, but was done separately make reviewing easier.
This patch (of 2):
Freeing a hugetlb page and releasing base pages back to the underlying
allocator such as buddy or cma is performed in two steps:
- remove_hugetlb_folio() is called to remove the folio from hugetlb
lists, get a ref on the page and remove hugetlb destructor. This
all must be done under the hugetlb lock. After this call, the page
can be treated as a normal compound page or a collection of base
size pages.
- update_and_free_hugetlb_folio() is called to allocate vmemmap if
needed and the free routine of the underlying allocator is called
on the resulting page. We can not hold the hugetlb lock here.
One issue with this scheme is that a memory error could occur between
these two steps. In this case, the memory error handling code treats
the old hugetlb page as a normal compound page or collection of base
pages. It will then try to SetPageHWPoison(page) on the page with an
error. If the page with error is a tail page without vmemmap, a write
error will occur when trying to set the flag.
Address this issue by modifying remove_hugetlb_folio() and
update_and_free_hugetlb_folio() such that the hugetlb destructor is not
cleared until after allocating vmemmap. Since clearing the destructor
requires holding the hugetlb lock, the clearing is done in
remove_hugetlb_folio() if the vmemmap is present. This saves a
lock/unlock cycle. Otherwise, destructor is cleared in
update_and_free_hugetlb_folio() after allocating vmemmap.
Note that this will leave hugetlb pages in a state where they are marked
free (by hugetlb specific page flag) and have a ref count. This is not
a normal state. The only code that would notice is the memory error
code, and it is set up to retry in such a case.
A subsequent patch will create a routine to do bulk processing of
vmemmap allocation. This will eliminate a lock/unlock cycle for each
hugetlb page in the case where we are freeing a large number of pages.
Link: https://lkml.kernel.org/r/20230711220942.43706-1-mike.kravetz@oracle.com
Link: https://lkml.kernel.org/r/20230711220942.43706-2-mike.kravetz@oracle.com
Fixes: ad2fa3717b74 ("mm: hugetlb: alloc the vmemmap pages associated with each HugeTLB page")
Signed-off-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Reviewed-by: Muchun Song <songmuchun(a)bytedance.com>
Tested-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: James Houghton <jthoughton(a)google.com>
Cc: Jiaqi Yan <jiaqiyan(a)google.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 75 +++++++++++++++++++++++++++++++++----------------
1 file changed, 51 insertions(+), 24 deletions(-)
--- a/mm/hugetlb.c~hugetlb-do-not-clear-hugetlb-dtor-until-allocating-vmemmap
+++ a/mm/hugetlb.c
@@ -1579,9 +1579,37 @@ static inline void destroy_compound_giga
unsigned int order) { }
#endif
+static inline void __clear_hugetlb_destructor(struct hstate *h,
+ struct folio *folio)
+{
+ lockdep_assert_held(&hugetlb_lock);
+
+ /*
+ * Very subtle
+ *
+ * For non-gigantic pages set the destructor to the normal compound
+ * page dtor. This is needed in case someone takes an additional
+ * temporary ref to the page, and freeing is delayed until they drop
+ * their reference.
+ *
+ * For gigantic pages set the destructor to the null dtor. This
+ * destructor will never be called. Before freeing the gigantic
+ * page destroy_compound_gigantic_folio will turn the folio into a
+ * simple group of pages. After this the destructor does not
+ * apply.
+ *
+ */
+ if (hstate_is_gigantic(h))
+ folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
+ else
+ folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
+}
+
/*
- * Remove hugetlb folio from lists, and update dtor so that the folio appears
- * as just a compound page.
+ * Remove hugetlb folio from lists.
+ * If vmemmap exists for the folio, update dtor so that the folio appears
+ * as just a compound page. Otherwise, wait until after allocating vmemmap
+ * to update dtor.
*
* A reference is held on the folio, except in the case of demote.
*
@@ -1612,31 +1640,19 @@ static void __remove_hugetlb_folio(struc
}
/*
- * Very subtle
- *
- * For non-gigantic pages set the destructor to the normal compound
- * page dtor. This is needed in case someone takes an additional
- * temporary ref to the page, and freeing is delayed until they drop
- * their reference.
- *
- * For gigantic pages set the destructor to the null dtor. This
- * destructor will never be called. Before freeing the gigantic
- * page destroy_compound_gigantic_folio will turn the folio into a
- * simple group of pages. After this the destructor does not
- * apply.
- *
- * This handles the case where more than one ref is held when and
- * after update_and_free_hugetlb_folio is called.
- *
- * In the case of demote we do not ref count the page as it will soon
- * be turned into a page of smaller size.
+ * We can only clear the hugetlb destructor after allocating vmemmap
+ * pages. Otherwise, someone (memory error handling) may try to write
+ * to tail struct pages.
+ */
+ if (!folio_test_hugetlb_vmemmap_optimized(folio))
+ __clear_hugetlb_destructor(h, folio);
+
+ /*
+ * In the case of demote we do not ref count the page as it will soon
+ * be turned into a page of smaller size.
*/
if (!demote)
folio_ref_unfreeze(folio, 1);
- if (hstate_is_gigantic(h))
- folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
- else
- folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--;
@@ -1705,6 +1721,7 @@ static void __update_and_free_hugetlb_fo
{
int i;
struct page *subpage;
+ bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio);
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
@@ -1735,6 +1752,16 @@ static void __update_and_free_hugetlb_fo
if (unlikely(folio_test_hwpoison(folio)))
folio_clear_hugetlb_hwpoison(folio);
+ /*
+ * If vmemmap pages were allocated above, then we need to clear the
+ * hugetlb destructor under the hugetlb lock.
+ */
+ if (clear_dtor) {
+ spin_lock_irq(&hugetlb_lock);
+ __clear_hugetlb_destructor(h, folio);
+ spin_unlock_irq(&hugetlb_lock);
+ }
+
for (i = 0; i < pages_per_huge_page(h); i++) {
subpage = folio_page(folio, i);
subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
_
Patches currently in -mm which might be from mike.kravetz(a)oracle.com are
The quilt patch titled
Subject: mm: memory-failure: avoid false hwpoison page mapped error info
has been removed from the -mm tree. Its filename was
mm-memory-failure-avoid-false-hwpoison-page-mapped-error-info.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Miaohe Lin <linmiaohe(a)huawei.com>
Subject: mm: memory-failure: avoid false hwpoison page mapped error info
Date: Thu, 27 Jul 2023 19:56:42 +0800
folio->_mapcount is overloaded in SLAB, so folio_mapped() has to be done
after folio_test_slab() is checked. Otherwise slab folio might be treated
as a mapped folio leading to false 'Someone maps the hwpoison page' error
info.
Link: https://lkml.kernel.org/r/20230727115643.639741-4-linmiaohe@huawei.com
Fixes: 230ac719c500 ("mm/hwpoison: don't try to unpoison containment-failed pages")
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Acked-by: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-avoid-false-hwpoison-page-mapped-error-info
+++ a/mm/memory-failure.c
@@ -2499,6 +2499,13 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
+ if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+ goto unlock_mutex;
+
+ /*
+ * Note that folio->_mapcount is overloaded in SLAB, so the simple test
+ * in folio_mapped() has to be done after folio_test_slab() is checked.
+ */
if (folio_mapped(folio)) {
unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
pfn, &unpoison_rs);
@@ -2511,9 +2518,6 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
- if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
- goto unlock_mutex;
-
ghp = get_hwpoison_page(p, MF_UNPOISON);
if (!ghp) {
if (PageHuge(p)) {
_
Patches currently in -mm which might be from linmiaohe(a)huawei.com are
mm-mm_initc-update-obsolete-comment-in-get_pfn_range_for_nid.patch
mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch
mm-memory-failure-fix-potential-page-refcnt-leak-in-memory_failure.patch
mm-memory-failure-remove-unneeded-page-state-check-in-shake_page.patch
memory-tier-use-helper-function-destroy_memory_type.patch
mm-memory-failure-remove-unneeded-inline-annotation.patch
mm-mm_initc-remove-obsolete-macro-hash_small.patch
mm-page_alloc-avoid-false-page-outside-zone-error-info.patch
memory-tier-rename-destroy_memory_type-to-put_memory_type.patch
mm-remove-obsolete-comment-above-struct-per_cpu_pages.patch
mm-memcg-minor-cleanup-for-mem_cgroup_id_max.patch
mm-memory-failure-remove-unneeded-pagehuge-check.patch
mm-memory-failure-ensure-moving-hwpoison-flag-to-the-raw-error-pages.patch
mm-memory-failure-dont-account-hwpoison_filter-filtered-pages.patch
mm-memory-failure-use-local-variable-huge-to-check-hugetlb-page.patch
mm-memory-failure-remove-unneeded-header-files.patch
mm-memory-failure-minor-cleanup-for-comments-and-codestyle.patch
mm-memory-failure-fetch-compound-head-after-extra-page-refcnt-is-held.patch
mm-memory-failure-fix-race-window-when-trying-to-get-hugetlb-folio.patch
mm-huge_memory-use-rmap_none-when-calling-page_add_anon_rmap.patch
mm-memcg-fix-obsolete-comment-above-mem_cgroup_max_reclaim_loops.patch
mm-memcg-minor-cleanup-for-mc_handle_present_pte.patch
memory-tier-use-helper-macro-__attr_rw.patch
mm-fix-obsolete-function-name-above-debug_pagealloc_enabled_static.patch
mm-mprotect-fix-obsolete-function-name-in-change_pte_range.patch
mm-memcg-fix-obsolete-function-name-in-mem_cgroup_protection.patch
mm-memory-failure-add-pageoffline-check.patch
mm-page_alloc-avoid-unneeded-alike_pages-calculation.patch
mm-memcg-update-obsolete-comment-above-parent_mem_cgroup.patch
mm-page_alloc-remove-unneeded-variable-base.patch
mm-memcg-fix-wrong-function-name-above-obj_cgroup_charge_zswap.patch
The quilt patch titled
Subject: mm: memory-failure: fix potential unexpected return value from unpoison_memory()
has been removed from the -mm tree. Its filename was
mm-memory-failure-fix-potential-unexpected-return-value-from-unpoison_memory.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Miaohe Lin <linmiaohe(a)huawei.com>
Subject: mm: memory-failure: fix potential unexpected return value from unpoison_memory()
Date: Thu, 27 Jul 2023 19:56:41 +0800
If unpoison_memory() fails to clear page hwpoisoned flag, return value ret
is expected to be -EBUSY. But when get_hwpoison_page() returns 1 and
fails to clear page hwpoisoned flag due to races, return value will be
unexpected 1 leading to users being confused. And there's a code smell
that the variable "ret" is used not only to save the return value of
unpoison_memory(), but also the return value from get_hwpoison_page().
Make a further cleanup by using another auto-variable solely to save the
return value of get_hwpoison_page() as suggested by Naoya.
Link: https://lkml.kernel.org/r/20230727115643.639741-3-linmiaohe@huawei.com
Fixes: bf181c582588 ("mm/hwpoison: fix unpoison_memory()")
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-fix-potential-unexpected-return-value-from-unpoison_memory
+++ a/mm/memory-failure.c
@@ -2466,7 +2466,7 @@ int unpoison_memory(unsigned long pfn)
{
struct folio *folio;
struct page *p;
- int ret = -EBUSY;
+ int ret = -EBUSY, ghp;
unsigned long count = 1;
bool huge = false;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -2514,29 +2514,28 @@ int unpoison_memory(unsigned long pfn)
if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
goto unlock_mutex;
- ret = get_hwpoison_page(p, MF_UNPOISON);
- if (!ret) {
+ ghp = get_hwpoison_page(p, MF_UNPOISON);
+ if (!ghp) {
if (PageHuge(p)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
- if (count == 0) {
- ret = -EBUSY;
+ if (count == 0)
goto unlock_mutex;
- }
}
ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY;
- } else if (ret < 0) {
- if (ret == -EHWPOISON) {
+ } else if (ghp < 0) {
+ if (ghp == -EHWPOISON) {
ret = put_page_back_buddy(p) ? 0 : -EBUSY;
- } else
+ } else {
+ ret = ghp;
unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
pfn, &unpoison_rs);
+ }
} else {
if (PageHuge(p)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
if (count == 0) {
- ret = -EBUSY;
folio_put(folio);
goto unlock_mutex;
}
_
Patches currently in -mm which might be from linmiaohe(a)huawei.com are
mm-mm_initc-update-obsolete-comment-in-get_pfn_range_for_nid.patch
mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch
mm-memory-failure-fix-potential-page-refcnt-leak-in-memory_failure.patch
mm-memory-failure-remove-unneeded-page-state-check-in-shake_page.patch
memory-tier-use-helper-function-destroy_memory_type.patch
mm-memory-failure-remove-unneeded-inline-annotation.patch
mm-mm_initc-remove-obsolete-macro-hash_small.patch
mm-page_alloc-avoid-false-page-outside-zone-error-info.patch
memory-tier-rename-destroy_memory_type-to-put_memory_type.patch
mm-remove-obsolete-comment-above-struct-per_cpu_pages.patch
mm-memcg-minor-cleanup-for-mem_cgroup_id_max.patch
mm-memory-failure-remove-unneeded-pagehuge-check.patch
mm-memory-failure-ensure-moving-hwpoison-flag-to-the-raw-error-pages.patch
mm-memory-failure-dont-account-hwpoison_filter-filtered-pages.patch
mm-memory-failure-use-local-variable-huge-to-check-hugetlb-page.patch
mm-memory-failure-remove-unneeded-header-files.patch
mm-memory-failure-minor-cleanup-for-comments-and-codestyle.patch
mm-memory-failure-fetch-compound-head-after-extra-page-refcnt-is-held.patch
mm-memory-failure-fix-race-window-when-trying-to-get-hugetlb-folio.patch
mm-huge_memory-use-rmap_none-when-calling-page_add_anon_rmap.patch
mm-memcg-fix-obsolete-comment-above-mem_cgroup_max_reclaim_loops.patch
mm-memcg-minor-cleanup-for-mc_handle_present_pte.patch
memory-tier-use-helper-macro-__attr_rw.patch
mm-fix-obsolete-function-name-above-debug_pagealloc_enabled_static.patch
mm-mprotect-fix-obsolete-function-name-in-change_pte_range.patch
mm-memcg-fix-obsolete-function-name-in-mem_cgroup_protection.patch
mm-memory-failure-add-pageoffline-check.patch
mm-page_alloc-avoid-unneeded-alike_pages-calculation.patch
mm-memcg-update-obsolete-comment-above-parent_mem_cgroup.patch
mm-page_alloc-remove-unneeded-variable-base.patch
mm-memcg-fix-wrong-function-name-above-obj_cgroup_charge_zswap.patch
The quilt patch titled
Subject: mm/swapfile: fix wrong swap entry type for hwpoisoned swapcache page
has been removed from the -mm tree. Its filename was
mm-swapfile-fix-wrong-swap-entry-type-for-hwpoisoned-swapcache-page.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Miaohe Lin <linmiaohe(a)huawei.com>
Subject: mm/swapfile: fix wrong swap entry type for hwpoisoned swapcache page
Date: Thu, 27 Jul 2023 19:56:40 +0800
Patch series "A few fixup patches for mm", v2.
This series contains a few fixup patches to fix potential unexpected
return value, fix wrong swap entry type for hwpoisoned swapcache page and
so on. More details can be found in the respective changelogs.
This patch (of 3):
Hwpoisoned dirty swap cache page is kept in the swap cache and there's
simple interception code in do_swap_page() to catch it. But when trying
to swapoff, unuse_pte() will wrongly install a general sense of "future
accesses are invalid" swap entry for hwpoisoned swap cache page due to
unaware of such type of page. The user will receive SIGBUS signal without
expected BUS_MCEERR_AR payload. BTW, typo 'hwposioned' is fixed.
Link: https://lkml.kernel.org/r/20230727115643.639741-1-linmiaohe@huawei.com
Link: https://lkml.kernel.org/r/20230727115643.639741-2-linmiaohe@huawei.com
Fixes: 6b970599e807 ("mm: hwpoison: support recovery from ksm_might_need_to_copy()")
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/ksm.c | 2 ++
mm/swapfile.c | 8 ++++----
2 files changed, 6 insertions(+), 4 deletions(-)
--- a/mm/ksm.c~mm-swapfile-fix-wrong-swap-entry-type-for-hwpoisoned-swapcache-page
+++ a/mm/ksm.c
@@ -2784,6 +2784,8 @@ struct page *ksm_might_need_to_copy(stru
anon_vma->root == vma->anon_vma->root) {
return page; /* still no need to copy it */
}
+ if (PageHWPoison(page))
+ return ERR_PTR(-EHWPOISON);
if (!PageUptodate(page))
return page; /* let do_swap_page report the error */
--- a/mm/swapfile.c~mm-swapfile-fix-wrong-swap-entry-type-for-hwpoisoned-swapcache-page
+++ a/mm/swapfile.c
@@ -1746,7 +1746,7 @@ static int unuse_pte(struct vm_area_stru
struct page *swapcache;
spinlock_t *ptl;
pte_t *pte, new_pte, old_pte;
- bool hwposioned = false;
+ bool hwpoisoned = PageHWPoison(page);
int ret = 1;
swapcache = page;
@@ -1754,7 +1754,7 @@ static int unuse_pte(struct vm_area_stru
if (unlikely(!page))
return -ENOMEM;
else if (unlikely(PTR_ERR(page) == -EHWPOISON))
- hwposioned = true;
+ hwpoisoned = true;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
@@ -1765,11 +1765,11 @@ static int unuse_pte(struct vm_area_stru
old_pte = ptep_get(pte);
- if (unlikely(hwposioned || !PageUptodate(page))) {
+ if (unlikely(hwpoisoned || !PageUptodate(page))) {
swp_entry_t swp_entry;
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
- if (hwposioned) {
+ if (hwpoisoned) {
swp_entry = make_hwpoison_entry(swapcache);
page = swapcache;
} else {
_
Patches currently in -mm which might be from linmiaohe(a)huawei.com are
mm-mm_initc-update-obsolete-comment-in-get_pfn_range_for_nid.patch
mm-memory-failure-fix-unexpected-return-value-in-soft_offline_page.patch
mm-memory-failure-fix-potential-page-refcnt-leak-in-memory_failure.patch
mm-memory-failure-remove-unneeded-page-state-check-in-shake_page.patch
memory-tier-use-helper-function-destroy_memory_type.patch
mm-memory-failure-remove-unneeded-inline-annotation.patch
mm-mm_initc-remove-obsolete-macro-hash_small.patch
mm-page_alloc-avoid-false-page-outside-zone-error-info.patch
memory-tier-rename-destroy_memory_type-to-put_memory_type.patch
mm-remove-obsolete-comment-above-struct-per_cpu_pages.patch
mm-memcg-minor-cleanup-for-mem_cgroup_id_max.patch
mm-memory-failure-remove-unneeded-pagehuge-check.patch
mm-memory-failure-ensure-moving-hwpoison-flag-to-the-raw-error-pages.patch
mm-memory-failure-dont-account-hwpoison_filter-filtered-pages.patch
mm-memory-failure-use-local-variable-huge-to-check-hugetlb-page.patch
mm-memory-failure-remove-unneeded-header-files.patch
mm-memory-failure-minor-cleanup-for-comments-and-codestyle.patch
mm-memory-failure-fetch-compound-head-after-extra-page-refcnt-is-held.patch
mm-memory-failure-fix-race-window-when-trying-to-get-hugetlb-folio.patch
mm-huge_memory-use-rmap_none-when-calling-page_add_anon_rmap.patch
mm-memcg-fix-obsolete-comment-above-mem_cgroup_max_reclaim_loops.patch
mm-memcg-minor-cleanup-for-mc_handle_present_pte.patch
memory-tier-use-helper-macro-__attr_rw.patch
mm-fix-obsolete-function-name-above-debug_pagealloc_enabled_static.patch
mm-mprotect-fix-obsolete-function-name-in-change_pte_range.patch
mm-memcg-fix-obsolete-function-name-in-mem_cgroup_protection.patch
mm-memory-failure-add-pageoffline-check.patch
mm-page_alloc-avoid-unneeded-alike_pages-calculation.patch
mm-memcg-update-obsolete-comment-above-parent_mem_cgroup.patch
mm-page_alloc-remove-unneeded-variable-base.patch
mm-memcg-fix-wrong-function-name-above-obj_cgroup_charge_zswap.patch
The quilt patch titled
Subject: radix tree test suite: fix incorrect allocation size for pthreads
has been removed from the -mm tree. Its filename was
radix-tree-test-suite-fix-incorrect-allocation-size-for-pthreads.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Colin Ian King <colin.i.king(a)gmail.com>
Subject: radix tree test suite: fix incorrect allocation size for pthreads
Date: Thu, 27 Jul 2023 17:09:30 +0100
Currently the pthread allocation for each array item is based on the size
of a pthread_t pointer and should be the size of the pthread_t structure,
so the allocation is under-allocating the correct size. Fix this by using
the size of each element in the pthreads array.
Static analysis cppcheck reported:
tools/testing/radix-tree/regression1.c:180:2: warning: Size of pointer
'threads' used instead of size of its data. [pointerSize]
Link: https://lkml.kernel.org/r/20230727160930.632674-1-colin.i.king@gmail.com
Fixes: 1366c37ed84b ("radix tree test harness")
Signed-off-by: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Konstantin Khlebnikov <koct9i(a)gmail.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/radix-tree/regression1.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/tools/testing/radix-tree/regression1.c~radix-tree-test-suite-fix-incorrect-allocation-size-for-pthreads
+++ a/tools/testing/radix-tree/regression1.c
@@ -177,7 +177,7 @@ void regression1_test(void)
nr_threads = 2;
pthread_barrier_init(&worker_barrier, NULL, nr_threads);
- threads = malloc(nr_threads * sizeof(pthread_t *));
+ threads = malloc(nr_threads * sizeof(*threads));
for (i = 0; i < nr_threads; i++) {
arg = i;
_
Patches currently in -mm which might be from colin.i.king(a)gmail.com are
fs-hfsplus-make-extend-error-rate-limited.patch
The quilt patch titled
Subject: zsmalloc: fix races between modifications of fullness and isolated
has been removed from the -mm tree. Its filename was
zsmalloc-fix-races-between-modifications-of-fullness-and-isolated.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Andrew Yang <andrew.yang(a)mediatek.com>
Subject: zsmalloc: fix races between modifications of fullness and isolated
Date: Fri, 21 Jul 2023 14:37:01 +0800
We encountered many kernel exceptions of VM_BUG_ON(zspage->isolated ==
0) in dec_zspage_isolation() and BUG_ON(!pages[1]) in zs_unmap_object()
lately. This issue only occurs when migration and reclamation occur at
the same time.
With our memory stress test, we can reproduce this issue several times
a day. We have no idea why no one else encountered this issue. BTW,
we switched to the new kernel version with this defect a few months
ago.
Since fullness and isolated share the same unsigned int, modifications of
them should be protected by the same lock.
[andrew.yang(a)mediatek.com: move comment]
Link: https://lkml.kernel.org/r/20230727062910.6337-1-andrew.yang@mediatek.com
Link: https://lkml.kernel.org/r/20230721063705.11455-1-andrew.yang@mediatek.com
Fixes: c4549b871102 ("zsmalloc: remove zspage isolation for migration")
Signed-off-by: Andrew Yang <andrew.yang(a)mediatek.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com>
Cc: Matthias Brugger <matthias.bgg(a)gmail.com>
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/zsmalloc.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
--- a/mm/zsmalloc.c~zsmalloc-fix-races-between-modifications-of-fullness-and-isolated
+++ a/mm/zsmalloc.c
@@ -1798,6 +1798,7 @@ static void replace_sub_page(struct size
static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
{
+ struct zs_pool *pool;
struct zspage *zspage;
/*
@@ -1807,9 +1808,10 @@ static bool zs_page_isolate(struct page
VM_BUG_ON_PAGE(PageIsolated(page), page);
zspage = get_zspage(page);
- migrate_write_lock(zspage);
+ pool = zspage->pool;
+ spin_lock(&pool->lock);
inc_zspage_isolation(zspage);
- migrate_write_unlock(zspage);
+ spin_unlock(&pool->lock);
return true;
}
@@ -1875,12 +1877,12 @@ static int zs_page_migrate(struct page *
kunmap_atomic(s_addr);
replace_sub_page(class, zspage, newpage, page);
+ dec_zspage_isolation(zspage);
/*
* Since we complete the data copy and set up new zspage structure,
* it's okay to release the pool's lock.
*/
spin_unlock(&pool->lock);
- dec_zspage_isolation(zspage);
migrate_write_unlock(zspage);
get_page(newpage);
@@ -1897,14 +1899,16 @@ static int zs_page_migrate(struct page *
static void zs_page_putback(struct page *page)
{
+ struct zs_pool *pool;
struct zspage *zspage;
VM_BUG_ON_PAGE(!PageIsolated(page), page);
zspage = get_zspage(page);
- migrate_write_lock(zspage);
+ pool = zspage->pool;
+ spin_lock(&pool->lock);
dec_zspage_isolation(zspage);
- migrate_write_unlock(zspage);
+ spin_unlock(&pool->lock);
}
static const struct movable_operations zsmalloc_mops = {
_
Patches currently in -mm which might be from andrew.yang(a)mediatek.com are
fs-drop_caches-draining-pages-before-dropping-caches.patch
During recent vma locking patch reviews Linus and Jann Horn noted a number
of issues with vma locking and suggested improvements:
1. walk_page_range() does not have ability to write-lock a vma during the
walk when it's done under mmap_write_lock. For example s390_reset_cmma().
2. Vma locking is hidden inside vm_flags modifiers and is hard to follow.
Suggestion is to change vm_flags_reset{_once} to assert that vma is
write-locked and require an explicit locking.
3. Same issue with vma_prepare() hiding vma locking.
4. In userfaultfd vm_flags are modified after vma->vm_userfaultfd_ctx and
page faults can operate on a context while it's changed.
5. do_brk_flags() and __install_special_mapping() not locking a newly
created vma before adding it into the mm. While not strictly a problem,
this is fragile if vma is modified after insertion, as in the
mmap_region() case which was recently fixed. Suggestion is to always lock
a new vma before inserting it and making it visible to page faults.
6. vma_assert_write_locked() for CONFIG_PER_VMA_LOCK=n would benefit from
being mmap_assert_write_locked() instead of no-op and then any place which
operates on a vma and calls mmap_assert_write_locked() can be converted
into vma_assert_write_locked().
I CC'ed stable only on the first patch because others are cleanups and the
bug in userfaultfd does not affect stable (lock_vma_under_rcu prevents
uffds from being handled under vma lock protection). However I would be
happy if the whole series is merged into stable 6.4 since it makes vma
locking more maintainable.
The patches apply cleanly over Linus' ToT and will conflict when applied
over mm-unstable due to missing [1]. The conflict can be easily resolved
by ignoring conflicting deletions but probably simpler to take [1] into
mm-unstable and avoid later conflict.
[1] commit 6c21e066f925 ("mm/mempolicy: Take VMA lock before replacing policy")
Changes since v3:
- changed vma locking in vma_merge to avoid locking prev when not
necessary, per Liam
Suren Baghdasaryan (6):
mm: enable page walking API to lock vmas during the walk
mm: for !CONFIG_PER_VMA_LOCK equate write lock assertion for vma and
mmap
mm: replace mmap with vma write lock assertions when operating on a
vma
mm: lock vma explicitly before doing vm_flags_reset and
vm_flags_reset_once
mm: always lock new vma before inserting into vma tree
mm: move vma locking out of vma_prepare and dup_anon_vma
arch/powerpc/kvm/book3s_hv_uvmem.c | 1 +
arch/powerpc/mm/book3s64/subpage_prot.c | 1 +
arch/riscv/mm/pageattr.c | 1 +
arch/s390/mm/gmap.c | 5 ++++
fs/proc/task_mmu.c | 5 ++++
fs/userfaultfd.c | 6 +++++
include/linux/mm.h | 13 ++++++---
include/linux/pagewalk.h | 11 ++++++++
mm/damon/vaddr.c | 2 ++
mm/hmm.c | 1 +
mm/hugetlb.c | 2 +-
mm/khugepaged.c | 5 ++--
mm/ksm.c | 25 ++++++++++-------
mm/madvise.c | 8 +++---
mm/memcontrol.c | 2 ++
mm/memory-failure.c | 1 +
mm/memory.c | 2 +-
mm/mempolicy.c | 22 +++++++++------
mm/migrate_device.c | 1 +
mm/mincore.c | 1 +
mm/mlock.c | 4 ++-
mm/mmap.c | 33 +++++++++++++++--------
mm/mprotect.c | 2 ++
mm/pagewalk.c | 36 ++++++++++++++++++++++---
mm/vmscan.c | 1 +
25 files changed, 148 insertions(+), 43 deletions(-)
--
2.41.0.585.gd2178a4bd4-goog
The patch titled
Subject: mm/gup: reintroduce FOLL_NUMA as FOLL_HONOR_NUMA_FAULT
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/gup: reintroduce FOLL_NUMA as FOLL_HONOR_NUMA_FAULT
Date: Thu, 3 Aug 2023 16:32:02 +0200
Unfortunately commit 474098edac26 ("mm/gup: replace FOLL_NUMA by
gup_can_follow_protnone()") missed that follow_page() and
follow_trans_huge_pmd() never implicitly set FOLL_NUMA because they really
don't want to fail on PROT_NONE-mapped pages -- either due to NUMA hinting
or due to inaccessible (PROT_NONE) VMAs.
As spelled out in commit 0b9d705297b2 ("mm: numa: Support NUMA hinting
page faults from gup/gup_fast"): "Other follow_page callers like KSM
should not use FOLL_NUMA, or they would fail to get the pages if they use
follow_page instead of get_user_pages."
liubo reported [1] that smaps_rollup results are imprecise, because they
miss accounting of pages that are mapped PROT_NONE. Further, it's easy to
reproduce that KSM no longer works on inaccessible VMAs on x86-64, because
pte_protnone()/pmd_protnone() also indictaes "true" in inaccessible VMAs,
and follow_page() refuses to return such pages right now.
As KVM really depends on these NUMA hinting faults, removing the
pte_protnone()/pmd_protnone() handling in GUP code completely is not
really an option.
To fix the issues at hand, let's revive FOLL_NUMA as FOLL_HONOR_NUMA_FAULT
to restore the original behavior for now and add better comments.
Set FOLL_HONOR_NUMA_FAULT independent of FOLL_FORCE in
is_valid_gup_args(), to add that flag for all external GUP users.
Note that there are three GUP-internal __get_user_pages() users that don't
end up calling is_valid_gup_args() and consequently won't get
FOLL_HONOR_NUMA_FAULT set.
1) get_dump_page(): we really don't want to handle NUMA hinting
faults. It specifies FOLL_FORCE and wouldn't have honored NUMA
hinting faults already.
2) populate_vma_page_range(): we really don't want to handle NUMA hinting
faults. It specifies FOLL_FORCE on accessible VMAs, so it wouldn't have
honored NUMA hinting faults already.
3) faultin_vma_page_range(): we similarly don't want to handle NUMA
hinting faults.
To make the combination of FOLL_FORCE and FOLL_HONOR_NUMA_FAULT work in
inaccessible VMAs properly, we have to perform VMA accessibility checks in
gup_can_follow_protnone().
As GUP-fast should reject such pages either way in
pte_access_permitted()/pmd_access_permitted() -- for example on x86-64 and
arm64 that both implement pte_protnone() -- let's just always fallback to
ordinary GUP when stumbling over pte_protnone()/pmd_protnone().
As Linus notes [2], honoring NUMA faults might only make sense for
selected GUP users.
So we should really see if we can instead let relevant GUP callers specify
it manually, and not trigger NUMA hinting faults from GUP as default.
Prepare for that by making FOLL_HONOR_NUMA_FAULT an external GUP flag and
adding appropriate documenation.
While at it, remove a stale comment from follow_trans_huge_pmd(): That
comment for pmd_protnone() was added in commit 2b4847e73004 ("mm: numa:
serialise parallel get_user_page against THP migration"), which noted:
THP does not unmap pages due to a lack of support for migration
entries at a PMD level. This allows races with get_user_pages
Nowadays, we do have PMD migration entries, so the comment no longer
applies. Let's drop it.
[1] https://lore.kernel.org/r/20230726073409.631838-1-liubo254@huawei.com
[2] https://lore.kernel.org/r/CAHk-=wgRiP_9X0rRdZKT8nhemZGNateMtb366t37d8-x7VRs…
Link: https://lkml.kernel.org/r/20230803143208.383663-2-david@redhat.com
Fixes: 474098edac26 ("mm/gup: replace FOLL_NUMA by gup_can_follow_protnone()")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reported-by: liubo <liubo254(a)huawei.com>
Closes: https://lore.kernel.org/r/20230726073409.631838-1-liubo254@huawei.com
Reported-by: Peter Xu <peterx(a)redhat.com>
Closes: https://lore.kernel.org/all/ZMKJjDaqZ7FW0jfe@x1n/
Acked-by: Mel Gorman <mgorman(a)techsingularity.net>
Acked-by: Peter Xu <peterx(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jason Gunthorpe <jgg(a)ziepe.ca>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Mel Gorman <mgorman(a)suse.de>
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/mm.h | 21 +++++++++++++++------
include/linux/mm_types.h | 9 +++++++++
mm/gup.c | 30 ++++++++++++++++++++++++------
mm/huge_memory.c | 3 +--
4 files changed, 49 insertions(+), 14 deletions(-)
--- a/include/linux/mm.h~mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault
+++ a/include/linux/mm.h
@@ -3421,15 +3421,24 @@ static inline int vm_fault_to_errno(vm_f
* Indicates whether GUP can follow a PROT_NONE mapped page, or whether
* a (NUMA hinting) fault is required.
*/
-static inline bool gup_can_follow_protnone(unsigned int flags)
+static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
+ unsigned int flags)
{
/*
- * FOLL_FORCE has to be able to make progress even if the VMA is
- * inaccessible. Further, FOLL_FORCE access usually does not represent
- * application behaviour and we should avoid triggering NUMA hinting
- * faults.
+ * If callers don't want to honor NUMA hinting faults, no need to
+ * determine if we would actually have to trigger a NUMA hinting fault.
*/
- return flags & FOLL_FORCE;
+ if (!(flags & FOLL_HONOR_NUMA_FAULT))
+ return true;
+
+ /*
+ * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
+ *
+ * Requiring a fault here even for inaccessible VMAs would mean that
+ * FOLL_FORCE cannot make any progress, because handle_mm_fault()
+ * refuses to process NUMA hinting faults in inaccessible VMAs.
+ */
+ return !vma_is_accessible(vma);
}
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
--- a/include/linux/mm_types.h~mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault
+++ a/include/linux/mm_types.h
@@ -1286,6 +1286,15 @@ enum {
FOLL_PCI_P2PDMA = 1 << 10,
/* allow interrupts from generic signals */
FOLL_INTERRUPTIBLE = 1 << 11,
+ /*
+ * Always honor (trigger) NUMA hinting faults.
+ *
+ * FOLL_WRITE implicitly honors NUMA hinting faults because a
+ * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE
+ * apply). get_user_pages_fast_only() always implicitly honors NUMA
+ * hinting faults.
+ */
+ FOLL_HONOR_NUMA_FAULT = 1 << 12,
/* See also internal only FOLL flags in mm/internal.h */
};
--- a/mm/gup.c~mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault
+++ a/mm/gup.c
@@ -597,7 +597,7 @@ static struct page *follow_page_pte(stru
pte = ptep_get(ptep);
if (!pte_present(pte))
goto no_page;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
goto no_page;
page = vm_normal_page(vma, address, pte);
@@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(stru
if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
- if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
+ if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
return no_page_table(vma, flags);
ptl = pmd_lock(mm, pmd);
@@ -851,6 +851,10 @@ struct page *follow_page(struct vm_area_
if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
return NULL;
+ /*
+ * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
+ * to fail on PROT_NONE-mapped pages.
+ */
page = follow_page_mask(vma, address, foll_flags, &ctx);
if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap);
@@ -2227,6 +2231,13 @@ static bool is_valid_gup_args(struct pag
gup_flags |= FOLL_UNLOCKABLE;
}
+ /*
+ * For now, always trigger NUMA hinting faults. Some GUP users like
+ * KVM require the hint to be as the calling context of GUP is
+ * functionally similar to a memory reference from task context.
+ */
+ gup_flags |= FOLL_HONOR_NUMA_FAULT;
+
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
@@ -2551,7 +2562,14 @@ static int gup_pte_range(pmd_t pmd, pmd_
struct page *page;
struct folio *folio;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ /*
+ * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
+ * pte_access_permitted() better should reject these pages
+ * either way: otherwise, GUP-fast might succeed in
+ * cases where ordinary GUP would fail due to VMA access
+ * permissions.
+ */
+ if (pte_protnone(pte))
goto pte_unmap;
if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@@ -2970,8 +2988,8 @@ static int gup_pmd_range(pud_t *pudp, pu
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
pmd_devmap(pmd))) {
- if (pmd_protnone(pmd) &&
- !gup_can_follow_protnone(flags))
+ /* See gup_pte_range() */
+ if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@@ -3151,7 +3169,7 @@ static int internal_get_user_pages_fast(
if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
FOLL_FORCE | FOLL_PIN | FOLL_GET |
FOLL_FAST_ONLY | FOLL_NOFAULT |
- FOLL_PCI_P2PDMA)))
+ FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
return -EINVAL;
if (gup_flags & FOLL_PIN)
--- a/mm/huge_memory.c~mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault
+++ a/mm/huge_memory.c
@@ -1467,8 +1467,7 @@ struct page *follow_trans_huge_pmd(struc
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
return ERR_PTR(-EFAULT);
- /* Full NUMA hinting faults to serialise migration in fault paths */
- if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
+ if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
_
Patches currently in -mm which might be from david(a)redhat.com are
mm-gup-reintroduce-foll_numa-as-foll_honor_numa_fault.patch
smaps-use-vm_normal_page_pmd-instead-of-follow_trans_huge_pmd.patch
mm-memory_hotplug-document-the-signal_pending-check-in-offline_pages.patch
kvm-explicitly-set-foll_honor_numa_fault-in-hva_to_pfn_slow.patch
mm-gup-dont-implicitly-set-foll_honor_numa_fault.patch
pgtable-improve-pte_protnone-comment.patch
selftest-mm-ksm_functional_tests-test-in-mmap_and_merge_range-if-anything-got-merged.patch
selftest-mm-ksm_functional_tests-add-prot_none-test.patch
We should be checking to see if async flips are supported in
amdgpu_dm_atomic_check() (i.e. not dm_crtc_helper_atomic_check()). Also,
async flipping isn't supported if a plane's framebuffer changes memory
domains during an atomic commit. So, move the check from
dm_crtc_helper_atomic_check() to amdgpu_dm_atomic_check() and check if
the memory domain has changed in amdgpu_dm_atomic_check().
Cc: stable(a)vger.kernel.org
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2733
Fixes: 3f86b60691e6 ("drm/amd/display: only accept async flips for fast updates")
Signed-off-by: Hamza Mahfooz <hamza.mahfooz(a)amd.com>
---
v2: link issue and revert back to the old way of setting update_type.
---
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 24 ++++++++++++++++---
.../amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 12 ----------
2 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 32fb551862b0..1d3afab5bc85 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -8086,10 +8086,12 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* fast updates.
*/
if (crtc->state->async_flip &&
- acrtc_state->update_type != UPDATE_TYPE_FAST)
+ (acrtc_state->update_type != UPDATE_TYPE_FAST ||
+ get_mem_type(old_plane_state->fb) != get_mem_type(fb)))
drm_warn_once(state->dev,
"[PLANE:%d:%s] async flip with non-fast update\n",
plane->base.id, plane->name);
+
bundle->flip_addrs[planes_count].flip_immediate =
crtc->state->async_flip &&
acrtc_state->update_type == UPDATE_TYPE_FAST &&
@@ -10050,6 +10052,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
/* Remove exiting planes if they are modified */
for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
+ if (old_plane_state->fb && new_plane_state->fb &&
+ get_mem_type(old_plane_state->fb) !=
+ get_mem_type(new_plane_state->fb))
+ lock_and_validation_needed = true;
+
ret = dm_update_plane_state(dc, state, plane,
old_plane_state,
new_plane_state,
@@ -10297,9 +10304,20 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
struct dm_crtc_state *dm_new_crtc_state =
to_dm_crtc_state(new_crtc_state);
+ /*
+ * Only allow async flips for fast updates that don't change
+ * the FB pitch, the DCC state, rotation, etc.
+ */
+ if (new_crtc_state->async_flip && lock_and_validation_needed) {
+ drm_dbg_atomic(crtc->dev,
+ "[CRTC:%d:%s] async flips are only supported for fast updates\n",
+ crtc->base.id, crtc->name);
+ ret = -EINVAL;
+ goto fail;
+ }
+
dm_new_crtc_state->update_type = lock_and_validation_needed ?
- UPDATE_TYPE_FULL :
- UPDATE_TYPE_FAST;
+ UPDATE_TYPE_FULL : UPDATE_TYPE_FAST;
}
/* Must be success */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 30d4c6fd95f5..440fc0869a34 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -398,18 +398,6 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
return -EINVAL;
}
- /*
- * Only allow async flips for fast updates that don't change the FB
- * pitch, the DCC state, rotation, etc.
- */
- if (crtc_state->async_flip &&
- dm_crtc_state->update_type != UPDATE_TYPE_FAST) {
- drm_dbg_atomic(crtc->dev,
- "[CRTC:%d:%s] async flips are only supported for fast updates\n",
- crtc->base.id, crtc->name);
- return -EINVAL;
- }
-
/* In some use cases, like reset, no stream is attached */
if (!dm_crtc_state->stream)
return 0;
--
2.41.0
When we added support for streaming mode SVE there were several missed
cases around ptrace, address them. Some could be seen on systems which
do physically have SVE, others would only impact SME only systems. The
Fixes: tag is a bit conservative for the SME only cases but it seems
like the safest and clearest choice.
Signed-off-by: Mark Brown <broonie(a)kernel.org>
---
Mark Brown (3):
arm64/ptrace: Don't enable SVE when setting streaming SVE
arm64/fpsimd: Sync FPSIMD state with SVE for SME only systems
arm64/fpsimd: Sync and zero pad FPSIMD state for streaming SVE
arch/arm64/kernel/fpsimd.c | 7 ++++---
arch/arm64/kernel/ptrace.c | 8 +++++---
2 files changed, 9 insertions(+), 6 deletions(-)
---
base-commit: 5d0c230f1de8c7515b6567d9afba1f196fb4e2f4
change-id: 20230802-arm64-fix-ptrace-ssve-no-sve-915863197925
Best regards,
--
Mark Brown <broonie(a)kernel.org>
[ Upstream commit 4acfe3dfde685a5a9eaec5555351918e2d7266a1 ]
Dan Carpenter spotted a race condition in a couple of situations like
these in the test_firmware driver:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
u8 val;
int ret;
ret = kstrtou8(buf, 10, &val);
if (ret)
return ret;
mutex_lock(&test_fw_mutex);
*(u8 *)cfg = val;
mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
static ssize_t config_num_requests_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int rc;
mutex_lock(&test_fw_mutex);
if (test_fw_config->reqs) {
pr_err("Must call release_all_firmware prior to changing config\n");
rc = -EINVAL;
mutex_unlock(&test_fw_mutex);
goto out;
}
mutex_unlock(&test_fw_mutex);
// NOTE: HERE is the race!!! Function can be preempted!
// test_fw_config->reqs can change between the release of
// the lock about and acquire of the lock in the
// test_dev_config_update_u8()
rc = test_dev_config_update_u8(buf, count,
&test_fw_config->num_requests);
out:
return rc;
}
static ssize_t config_read_fw_idx_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
return test_dev_config_update_u8(buf, count,
&test_fw_config->read_fw_idx);
}
The function test_dev_config_update_u8() is called from both the locked
and the unlocked context, function config_num_requests_store() and
config_read_fw_idx_store() which can both be called asynchronously as
they are driver's methods, while test_dev_config_update_u8() and siblings
change their argument pointed to by u8 *cfg or similar pointer.
To avoid deadlock on test_fw_mutex, the lock is dropped before calling
test_dev_config_update_u8() and re-acquired within test_dev_config_update_u8()
itself, but alas this creates a race condition.
Having two locks wouldn't assure a race-proof mutual exclusion.
This situation is best avoided by the introduction of a new, unlocked
function __test_dev_config_update_u8() which can be called from the locked
context and reducing test_dev_config_update_u8() to:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
mutex_lock(&test_fw_mutex);
ret = __test_dev_config_update_u8(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
}
doing the locking and calling the unlocked primitive, which enables both
locked and unlocked versions without duplication of code.
Fixes: c92316bf8e948 ("test_firmware: add batched firmware tests")
Cc: Luis R. Rodriguez <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.4, 4.19, 4.14
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/r/20230509084746.48259-1-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
[ This is the patch to fix the racing condition in locking for the 5.4, ]
[ 4.19 and 4.14 stable branches. Not all the fixes from the upstream ]
[ commit apply, but those which do are verbatim equal to those in the ]
[ upstream commit. ]
---
v3:
minor bug fixes in the commit description. no change to the code.
5.4, 4.19 and 4.14 passed build, 5.4 and 4.19 passed kselftest.
unable to boot 4.14, should work (no changes to lib/test_firmware.c).
v2:
bundled locking and ENOSPC patches together.
tested on 5.4 and 4.19 stable.
lib/test_firmware.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38553944e967..92d7195d5b5b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -301,16 +301,26 @@ static ssize_t config_test_show_str(char *dst,
return len;
}
-static int test_dev_config_update_bool(const char *buf, size_t size,
- bool *cfg)
+static inline int __test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
{
int ret;
- mutex_lock(&test_fw_mutex);
if (strtobool(buf, cfg) < 0)
ret = -EINVAL;
else
ret = size;
+
+ return ret;
+}
+
+static int test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_bool(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
@@ -340,7 +350,7 @@ static ssize_t test_dev_config_show_int(char *buf, int cfg)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+static inline int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
long new;
@@ -352,14 +362,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
if (new > U8_MAX)
return -EINVAL;
- mutex_lock(&test_fw_mutex);
*(u8 *)cfg = new;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
+static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_u8(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+}
+
static ssize_t test_dev_config_show_u8(char *buf, u8 cfg)
{
u8 val;
@@ -392,10 +411,10 @@ static ssize_t config_num_requests_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_u8(buf, count,
- &test_fw_config->num_requests);
+ rc = __test_dev_config_update_u8(buf, count,
+ &test_fw_config->num_requests);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
--
2.34.1
[ Upstream commit 4acfe3dfde685a5a9eaec5555351918e2d7266a1 ]
Dan Carpenter spotted a race condition in a couple of situations like
these in the test_firmware driver:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
u8 val;
int ret;
ret = kstrtou8(buf, 10, &val);
if (ret)
return ret;
mutex_lock(&test_fw_mutex);
*(u8 *)cfg = val;
mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
static ssize_t config_num_requests_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int rc;
mutex_lock(&test_fw_mutex);
if (test_fw_config->reqs) {
pr_err("Must call release_all_firmware prior to changing config\n");
rc = -EINVAL;
mutex_unlock(&test_fw_mutex);
goto out;
}
mutex_unlock(&test_fw_mutex);
// NOTE: HERE is the race!!! Function can be preempted!
// test_fw_config->reqs can change between the release of
// the lock about and acquire of the lock in the
// test_dev_config_update_u8()
rc = test_dev_config_update_u8(buf, count,
&test_fw_config->num_requests);
out:
return rc;
}
static ssize_t config_read_fw_idx_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
return test_dev_config_update_u8(buf, count,
&test_fw_config->read_fw_idx);
}
The function test_dev_config_update_u8() is called from both the locked
and the unlocked context, function config_num_requests_store() and
config_read_fw_idx_store() which can both be called asynchronously as
they are driver's methods, while test_dev_config_update_u8() and siblings
change their argument pointed to by u8 *cfg or similar pointer.
To avoid deadlock on test_fw_mutex, the lock is dropped before calling
test_dev_config_update_u8() and re-acquired within test_dev_config_update_u8()
itself, but alas this creates a race condition.
Having two locks wouldn't assure a race-proof mutual exclusion.
This situation is best avoided by the introduction of a new, unlocked
function __test_dev_config_update_u8() which can be called from the locked
context and reducing test_dev_config_update_u8() to:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
mutex_lock(&test_fw_mutex);
ret = __test_dev_config_update_u8(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
}
doing the locking and calling the unlocked primitive, which enables both
locked and unlocked versions without duplication of code.
Fixes: c92316bf8e948 ("test_firmware: add batched firmware tests")
Cc: Luis R. Rodriguez <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.4, 4.19, 4.14
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/r/20230509084746.48259-1-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
[ This is the patch to fix the racing condition in locking for the 5.4, ]
[ 4.19 and 4.14 stable branches. Not all the fixes from the upstream ]
[ commit apply, but those which do are verbatim equal to those in the ]
[ upstream commit. ]
---
v4:
minor versioning clarifications for the patchwork. no changes to the commit.
v3:
fixed a minor typo. no change to commit.
v2:
tested on 5.4 stable build.
lib/test_firmware.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38553944e967..92d7195d5b5b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -301,16 +301,26 @@ static ssize_t config_test_show_str(char *dst,
return len;
}
-static int test_dev_config_update_bool(const char *buf, size_t size,
- bool *cfg)
+static inline int __test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
{
int ret;
- mutex_lock(&test_fw_mutex);
if (strtobool(buf, cfg) < 0)
ret = -EINVAL;
else
ret = size;
+
+ return ret;
+}
+
+static int test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_bool(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
@@ -340,7 +350,7 @@ static ssize_t test_dev_config_show_int(char *buf, int cfg)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+static inline int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
long new;
@@ -352,14 +362,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
if (new > U8_MAX)
return -EINVAL;
- mutex_lock(&test_fw_mutex);
*(u8 *)cfg = new;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
+static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_u8(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+}
+
static ssize_t test_dev_config_show_u8(char *buf, u8 cfg)
{
u8 val;
@@ -392,10 +411,10 @@ static ssize_t config_num_requests_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_u8(buf, count,
- &test_fw_config->num_requests);
+ rc = __test_dev_config_update_u8(buf, count,
+ &test_fw_config->num_requests);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
--
2.34.1
[ commit be37bed754ed90b2655382f93f9724b3c1aae847 upstream ]
Dan Carpenter spotted that test_fw_config->reqs will be leaked if
trigger_batched_requests_store() is called two or more times.
The same appears with trigger_batched_requests_async_store().
This bug wasn't triggered by the tests, but observed by Dan's visual
inspection of the code.
The recommended workaround was to return -EBUSY if test_fw_config->reqs
is already allocated.
Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests")
Cc: Luis Chamberlain <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v4.19
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Suggested-by: Takashi Iwai <tiwai(a)suse.de>
Link: https://lore.kernel.org/r/20230509084746.48259-2-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
[ This is a backport to v4.19 stable branch without a change in code from the 5.4+ patch ]
---
v2:
no changes to commit. minor clarifications with versioning for the patchwork.
v1:
patch sumbmitted verbatim from the 5.4+ branch to 4.19
lib/test_firmware.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index f4cc874021da..e4688821eab8 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -618,6 +618,11 @@ static ssize_t trigger_batched_requests_store(struct device *dev,
mutex_lock(&test_fw_mutex);
+ if (test_fw_config->reqs) {
+ rc = -EBUSY;
+ goto out_bail;
+ }
+
test_fw_config->reqs =
vzalloc(array3_size(sizeof(struct test_batched_req),
test_fw_config->num_requests, 2));
@@ -721,6 +726,11 @@ ssize_t trigger_batched_requests_async_store(struct device *dev,
mutex_lock(&test_fw_mutex);
+ if (test_fw_config->reqs) {
+ rc = -EBUSY;
+ goto out_bail;
+ }
+
test_fw_config->reqs =
vzalloc(array3_size(sizeof(struct test_batched_req),
test_fw_config->num_requests, 2));
--
2.34.1
During recent vma locking patch reviews Linus and Jann Horn noted a number
of issues with vma locking and suggested improvements:
1. walk_page_range() does not have ability to write-lock a vma during the
walk when it's done under mmap_write_lock. For example s390_reset_cmma().
2. Vma locking is hidden inside vm_flags modifiers and is hard to follow.
Suggestion is to change vm_flags_reset{_once} to assert that vma is
write-locked and require an explicit locking.
3. Same issue with vma_prepare() hiding vma locking.
4. In userfaultfd vm_flags are modified after vma->vm_userfaultfd_ctx and
page faults can operate on a context while it's changed.
5. do_brk_flags() and __install_special_mapping() not locking a newly
created vma before adding it into the mm. While not strictly a problem,
this is fragile if vma is modified after insertion, as in the
mmap_region() case which was recently fixed. Suggestion is to always lock
a new vma before inserting it and making it visible to page faults.
6. vma_assert_write_locked() for CONFIG_PER_VMA_LOCK=n would benefit from
being mmap_assert_write_locked() instead of no-op and then any place which
operates on a vma and calls mmap_assert_write_locked() can be converted
into vma_assert_write_locked().
I CC'ed stable only on the first patch because others are cleanups and the
bug in userfaultfd does not affect stable (lock_vma_under_rcu prevents
uffds from being handled under vma lock protection). However I would be
happy if the whole series is merged into stable 6.4 since it makes vma
locking more maintainable.
The patches apply cleanly over Linus' ToT and will conflict when applied
over mm-unstable due to missing [1]. The conflict can be easily resolved
by ignoring conflicting deletions but probably simpler to take [1] into
mm-unstable and avoid later conflict.
[1] commit 6c21e066f925 ("mm/mempolicy: Take VMA lock before replacing policy")
Changes since v2:
- removed vma locking from hfi1_file_mmap(), per Linus
- moved vma locking out of dup_anon_vma(), per Liam
- added Liam's Reviewed-by
Suren Baghdasaryan (6):
mm: enable page walking API to lock vmas during the walk
mm: for !CONFIG_PER_VMA_LOCK equate write lock assertion for vma and
mmap
mm: replace mmap with vma write lock assertions when operating on a
vma
mm: lock vma explicitly before doing vm_flags_reset and
vm_flags_reset_once
mm: always lock new vma before inserting into vma tree
mm: move vma locking out of vma_prepare and dup_anon_vma
arch/powerpc/kvm/book3s_hv_uvmem.c | 1 +
arch/powerpc/mm/book3s64/subpage_prot.c | 1 +
arch/riscv/mm/pageattr.c | 1 +
arch/s390/mm/gmap.c | 5 ++++
fs/proc/task_mmu.c | 5 ++++
fs/userfaultfd.c | 6 +++++
include/linux/mm.h | 13 ++++++---
include/linux/pagewalk.h | 11 ++++++++
mm/damon/vaddr.c | 2 ++
mm/hmm.c | 1 +
mm/hugetlb.c | 2 +-
mm/khugepaged.c | 5 ++--
mm/ksm.c | 25 ++++++++++-------
mm/madvise.c | 8 +++---
mm/memcontrol.c | 2 ++
mm/memory-failure.c | 1 +
mm/memory.c | 2 +-
mm/mempolicy.c | 22 +++++++++------
mm/migrate_device.c | 1 +
mm/mincore.c | 1 +
mm/mlock.c | 4 ++-
mm/mmap.c | 32 ++++++++++++++--------
mm/mprotect.c | 2 ++
mm/pagewalk.c | 36 ++++++++++++++++++++++---
mm/vmscan.c | 1 +
25 files changed, 147 insertions(+), 43 deletions(-)
--
2.41.0.585.gd2178a4bd4-goog
Commit 3f9ffce5765d ("drm/i915: Do panel VBT init early if the VBT
declares an explicit panel type") started using -1 as the value for
unset panel_type. It gets initialized in intel_panel_init_alloc(), but
the SDVO code never calls it.
Call intel_panel_init_alloc() to initialize the panel, including the
panel_type.
Reported-by: Tomi Leppänen <tomi(a)tomin.site>
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8896
Fixes: 3f9ffce5765d ("drm/i915: Do panel VBT init early if the VBT declares an explicit panel type")
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> # v6.1+
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
---
drivers/gpu/drm/i915/display/intel_sdvo.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 8298a86d1334..b4faf97936b9 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -2752,7 +2752,7 @@ static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void)
__drm_atomic_helper_connector_reset(&sdvo_connector->base.base,
&conn_state->base.base);
- INIT_LIST_HEAD(&sdvo_connector->base.panel.fixed_modes);
+ intel_panel_init_alloc(&sdvo_connector->base);
return sdvo_connector;
}
--
2.39.2
This is a note to let you know that I've just added the patch titled
iio: adc: meson: fix core clock enable/disable moment
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 09738ccbc4148c62d6c8c4644ff4a099d57f49ad Mon Sep 17 00:00:00 2001
From: George Stark <gnstark(a)sberdevices.ru>
Date: Fri, 21 Jul 2023 13:23:08 +0300
Subject: iio: adc: meson: fix core clock enable/disable moment
Enable core clock at probe stage and disable it at remove stage.
Core clock is responsible for turning on/off the entire SoC module so
it should be on before the first module register is touched and be off
at very last moment.
Fixes: 3adbf3427330 ("iio: adc: add a driver for the SAR ADC found in Amlogic Meson SoCs")
Signed-off-by: George Stark <gnstark(a)sberdevices.ru>
Link: https://lore.kernel.org/r/20230721102413.255726-2-gnstark@sberdevices.ru
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/meson_saradc.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c
index af6bfcc19075..eb78a6f17fd0 100644
--- a/drivers/iio/adc/meson_saradc.c
+++ b/drivers/iio/adc/meson_saradc.c
@@ -916,12 +916,6 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev)
goto err_vref;
}
- ret = clk_prepare_enable(priv->core_clk);
- if (ret) {
- dev_err(dev, "failed to enable core clk\n");
- goto err_core_clk;
- }
-
regval = FIELD_PREP(MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, 1);
regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG0,
MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, regval);
@@ -948,8 +942,6 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev)
regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3,
MESON_SAR_ADC_REG3_ADC_EN, 0);
meson_sar_adc_set_bandgap(indio_dev, false);
- clk_disable_unprepare(priv->core_clk);
-err_core_clk:
regulator_disable(priv->vref);
err_vref:
meson_sar_adc_unlock(indio_dev);
@@ -977,8 +969,6 @@ static void meson_sar_adc_hw_disable(struct iio_dev *indio_dev)
meson_sar_adc_set_bandgap(indio_dev, false);
- clk_disable_unprepare(priv->core_clk);
-
regulator_disable(priv->vref);
if (!ret)
@@ -1211,7 +1201,7 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
if (IS_ERR(priv->clkin))
return dev_err_probe(dev, PTR_ERR(priv->clkin), "failed to get clkin\n");
- priv->core_clk = devm_clk_get(dev, "core");
+ priv->core_clk = devm_clk_get_enabled(dev, "core");
if (IS_ERR(priv->core_clk))
return dev_err_probe(dev, PTR_ERR(priv->core_clk), "failed to get core clk\n");
@@ -1294,15 +1284,26 @@ static int meson_sar_adc_remove(struct platform_device *pdev)
static int meson_sar_adc_suspend(struct device *dev)
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
meson_sar_adc_hw_disable(indio_dev);
+ clk_disable_unprepare(priv->core_clk);
+
return 0;
}
static int meson_sar_adc_resume(struct device *dev)
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
+ int ret;
+
+ ret = clk_prepare_enable(priv->core_clk);
+ if (ret) {
+ dev_err(dev, "failed to enable core clk\n");
+ return ret;
+ }
return meson_sar_adc_hw_enable(indio_dev);
}
--
2.41.0
This is a note to let you know that I've just added the patch titled
iio: frequency: admv1013: propagate errors from
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 507397d19b5a296aa339f7a1bd16284f668a1906 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Tue, 18 Jul 2023 10:02:18 +0300
Subject: iio: frequency: admv1013: propagate errors from
regulator_get_voltage()
The regulator_get_voltage() function returns negative error codes.
This function saves it to an unsigned int and then does some range
checking and, since the error code falls outside the correct range,
it returns -EINVAL.
Beyond the messiness, this is bad because the regulator_get_voltage()
function can return -EPROBE_DEFER and it's important to propagate that
back properly so it can be handled.
Fixes: da35a7b526d9 ("iio: frequency: admv1013: add support for ADMV1013")
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Link: https://lore.kernel.org/r/ce75aac3-2aba-4435-8419-02e59fdd862b@moroto.mount…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/frequency/admv1013.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c
index 9bf8337806fc..8c8e0bbfc99f 100644
--- a/drivers/iio/frequency/admv1013.c
+++ b/drivers/iio/frequency/admv1013.c
@@ -344,9 +344,12 @@ static int admv1013_update_quad_filters(struct admv1013_state *st)
static int admv1013_update_mixer_vgate(struct admv1013_state *st)
{
- unsigned int vcm, mixer_vgate;
+ unsigned int mixer_vgate;
+ int vcm;
vcm = regulator_get_voltage(st->reg);
+ if (vcm < 0)
+ return vcm;
if (vcm < 1800000)
mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100;
--
2.41.0
This is a note to let you know that I've just added the patch titled
iio: adc: ina2xx: avoid NULL pointer dereference on OF device match
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From a41e19cc0d6b6a445a4133170b90271e4a2553dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alvin=20=C5=A0ipraga?= <alsi(a)bang-olufsen.dk>
Date: Mon, 19 Jun 2023 16:12:39 +0200
Subject: iio: adc: ina2xx: avoid NULL pointer dereference on OF device match
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The affected lines were resulting in a NULL pointer dereference on our
platform because the device tree contained the following list of
compatible strings:
power-sensor@40 {
compatible = "ti,ina232", "ti,ina231";
...
};
Since the driver doesn't declare a compatible string "ti,ina232", the OF
matching succeeds on "ti,ina231". But the I2C device ID info is
populated via the first compatible string, cf. modalias population in
of_i2c_get_board_info(). Since there is no "ina232" entry in the legacy
I2C device ID table either, the struct i2c_device_id *id pointer in the
probe function is NULL.
Fix this by using the already populated type variable instead, which
points to the proper driver data. Since the name is also wanted, add a
generic one to the ina2xx_config table.
Signed-off-by: Alvin Šipraga <alsi(a)bang-olufsen.dk>
Fixes: c43a102e67db ("iio: ina2xx: add support for TI INA2xx Power Monitors")
Link: https://lore.kernel.org/r/20230619141239.2257392-1-alvin@pqrs.dk
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ina2xx-adc.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c
index 213526c1592f..aea83f369437 100644
--- a/drivers/iio/adc/ina2xx-adc.c
+++ b/drivers/iio/adc/ina2xx-adc.c
@@ -124,6 +124,7 @@ static const struct regmap_config ina2xx_regmap_config = {
enum ina2xx_ids { ina219, ina226 };
struct ina2xx_config {
+ const char *name;
u16 config_default;
int calibration_value;
int shunt_voltage_lsb; /* nV */
@@ -155,6 +156,7 @@ struct ina2xx_chip_info {
static const struct ina2xx_config ina2xx_config[] = {
[ina219] = {
+ .name = "ina219",
.config_default = INA219_CONFIG_DEFAULT,
.calibration_value = 4096,
.shunt_voltage_lsb = 10000,
@@ -164,6 +166,7 @@ static const struct ina2xx_config ina2xx_config[] = {
.chip_id = ina219,
},
[ina226] = {
+ .name = "ina226",
.config_default = INA226_CONFIG_DEFAULT,
.calibration_value = 2048,
.shunt_voltage_lsb = 2500,
@@ -996,7 +999,7 @@ static int ina2xx_probe(struct i2c_client *client)
/* Patch the current config register with default. */
val = chip->config->config_default;
- if (id->driver_data == ina226) {
+ if (type == ina226) {
ina226_set_average(chip, INA226_DEFAULT_AVG, &val);
ina226_set_int_time_vbus(chip, INA226_DEFAULT_IT, &val);
ina226_set_int_time_vshunt(chip, INA226_DEFAULT_IT, &val);
@@ -1015,7 +1018,7 @@ static int ina2xx_probe(struct i2c_client *client)
}
indio_dev->modes = INDIO_DIRECT_MODE;
- if (id->driver_data == ina226) {
+ if (type == ina226) {
indio_dev->channels = ina226_channels;
indio_dev->num_channels = ARRAY_SIZE(ina226_channels);
indio_dev->info = &ina226_info;
@@ -1024,7 +1027,7 @@ static int ina2xx_probe(struct i2c_client *client)
indio_dev->num_channels = ARRAY_SIZE(ina219_channels);
indio_dev->info = &ina219_info;
}
- indio_dev->name = id->name;
+ indio_dev->name = id ? id->name : chip->config->name;
ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev,
&ina2xx_setup_ops);
--
2.41.0
This is a note to let you know that I've just added the patch titled
usb-storage: alauda: Fix uninit-value in alauda_check_media()
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From a6ff6e7a9dd69364547751db0f626a10a6d628d2 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Wed, 2 Aug 2023 13:49:02 -0400
Subject: usb-storage: alauda: Fix uninit-value in alauda_check_media()
Syzbot got KMSAN to complain about access to an uninitialized value in
the alauda subdriver of usb-storage:
BUG: KMSAN: uninit-value in alauda_transport+0x462/0x57f0
drivers/usb/storage/alauda.c:1137
CPU: 0 PID: 12279 Comm: usb-storage Not tainted 5.3.0-rc7+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x191/0x1f0 lib/dump_stack.c:113
kmsan_report+0x13a/0x2b0 mm/kmsan/kmsan_report.c:108
__msan_warning+0x73/0xe0 mm/kmsan/kmsan_instr.c:250
alauda_check_media+0x344/0x3310 drivers/usb/storage/alauda.c:460
The problem is that alauda_check_media() doesn't verify that its USB
transfer succeeded before trying to use the received data. What
should happen if the transfer fails isn't entirely clear, but a
reasonably conservative approach is to pretend that no media is
present.
A similar problem exists in a usb_stor_dbg() call in
alauda_get_media_status(). In this case, when an error occurs the
call is redundant, because usb_stor_ctrl_transfer() already will print
a debugging message.
Finally, unrelated to the uninitialized memory access, is the fact
that alauda_check_media() performs DMA to a buffer on the stack.
Fortunately usb-storage provides a general purpose DMA-able buffer for
uses like this. We'll use it instead.
Reported-and-tested-by: syzbot+e7d46eb426883fb97efd(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/0000000000007d25ff059457342d@google.com/T/
Suggested-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Fixes: e80b0fade09e ("[PATCH] USB Storage: add alauda support")
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/693d5d5e-f09b-42d0-8ed9-1f96cd30bcce@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/storage/alauda.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c
index 5e912dd29b4c..115f05a6201a 100644
--- a/drivers/usb/storage/alauda.c
+++ b/drivers/usb/storage/alauda.c
@@ -318,7 +318,8 @@ static int alauda_get_media_status(struct us_data *us, unsigned char *data)
rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe,
command, 0xc0, 0, 1, data, 2);
- usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
+ if (rc == USB_STOR_XFER_GOOD)
+ usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
return rc;
}
@@ -454,9 +455,14 @@ static int alauda_init_media(struct us_data *us)
static int alauda_check_media(struct us_data *us)
{
struct alauda_info *info = (struct alauda_info *) us->extra;
- unsigned char status[2];
+ unsigned char *status = us->iobuf;
+ int rc;
- alauda_get_media_status(us, status);
+ rc = alauda_get_media_status(us, status);
+ if (rc != USB_STOR_XFER_GOOD) {
+ status[0] = 0xF0; /* Pretend there's no media */
+ status[1] = 0;
+ }
/* Check for no media or door open */
if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10)
--
2.41.0
This is a note to let you know that I've just added the patch titled
usb: common: usb-conn-gpio: Prevent bailing out if initial role is
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 8e21a620c7e6e00347ade1a6ed4967b359eada5a Mon Sep 17 00:00:00 2001
From: Prashanth K <quic_prashk(a)quicinc.com>
Date: Tue, 1 Aug 2023 14:33:52 +0530
Subject: usb: common: usb-conn-gpio: Prevent bailing out if initial role is
none
Currently if we bootup a device without cable connected, then
usb-conn-gpio won't call set_role() because last_role is same
as current role. This happens since last_role gets initialised
to zero during the probe.
To avoid this, add a new flag initial_detection into struct
usb_conn_info, which prevents bailing out during initial
detection.
Cc: <stable(a)vger.kernel.org> # 5.4
Fixes: 4602f3bff266 ("usb: common: add USB GPIO based connection detection driver")
Signed-off-by: Prashanth K <quic_prashk(a)quicinc.com>
Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/1690880632-12588-1-git-send-email-quic_prashk@qui…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/common/usb-conn-gpio.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c
index 766005d20bae..501e8bc9738e 100644
--- a/drivers/usb/common/usb-conn-gpio.c
+++ b/drivers/usb/common/usb-conn-gpio.c
@@ -42,6 +42,7 @@ struct usb_conn_info {
struct power_supply_desc desc;
struct power_supply *charger;
+ bool initial_detection;
};
/*
@@ -86,11 +87,13 @@ static void usb_conn_detect_cable(struct work_struct *work)
dev_dbg(info->dev, "role %s -> %s, gpios: id %d, vbus %d\n",
usb_role_string(info->last_role), usb_role_string(role), id, vbus);
- if (info->last_role == role) {
+ if (!info->initial_detection && info->last_role == role) {
dev_warn(info->dev, "repeated role: %s\n", usb_role_string(role));
return;
}
+ info->initial_detection = false;
+
if (info->last_role == USB_ROLE_HOST && info->vbus)
regulator_disable(info->vbus);
@@ -258,6 +261,7 @@ static int usb_conn_probe(struct platform_device *pdev)
device_set_wakeup_capable(&pdev->dev, true);
/* Perform initial detection */
+ info->initial_detection = true;
usb_conn_queue_dwork(info, 0);
return 0;
--
2.41.0
This is a note to let you know that I've just added the patch titled
USB: Gadget: core: Help prevent panic during UVC unconfigure
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 65dadb2beeb7360232b09ebc4585b54475dfee06 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern(a)rowland.harvard.edu>
Date: Sat, 29 Jul 2023 10:59:38 -0400
Subject: USB: Gadget: core: Help prevent panic during UVC unconfigure
Avichal Rakesh reported a kernel panic that occurred when the UVC
gadget driver was removed from a gadget's configuration. The panic
involves a somewhat complicated interaction between the kernel driver
and a userspace component (as described in the Link tag below), but
the analysis did make one thing clear: The Gadget core should
accomodate gadget drivers calling usb_gadget_deactivate() as part of
their unbind procedure.
Currently this doesn't work. gadget_unbind_driver() calls
driver->unbind() while holding the udc->connect_lock mutex, and
usb_gadget_deactivate() attempts to acquire that mutex, which will
result in a deadlock.
The simple fix is for gadget_unbind_driver() to release the mutex when
invoking the ->unbind() callback. There is no particular reason for
it to be holding the mutex at that time, and the mutex isn't held
while the ->bind() callback is invoked. So we'll drop the mutex
before performing the unbind callback and reacquire it afterward.
We'll also add a couple of comments to usb_gadget_activate() and
usb_gadget_deactivate(). Because they run in process context they
must not be called from a gadget driver's ->disconnect() callback,
which (according to the kerneldoc for struct usb_gadget_driver in
include/linux/usb/gadget.h) may run in interrupt context. This may
help prevent similar bugs from arising in the future.
Reported-and-tested-by: Avichal Rakesh <arakesh(a)google.com>
Signed-off-by: Alan Stern <stern(a)rowland.harvard.edu>
Fixes: 286d9975a838 ("usb: gadget: udc: core: Prevent soft_connect_store() race")
Link: https://lore.kernel.org/linux-usb/4d7aa3f4-22d9-9f5a-3d70-1bd7148ff4ba@goog…
Cc: Badhri Jagan Sridharan <badhri(a)google.com>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/48b2f1f1-0639-46bf-bbfc-98cb05a24914@rowland.harv…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/gadget/udc/core.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index cd58f2a4e7f3..7d49d8a0b00c 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -822,6 +822,9 @@ EXPORT_SYMBOL_GPL(usb_gadget_disconnect);
* usb_gadget_activate() is called. For example, user mode components may
* need to be activated before the system can talk to hosts.
*
+ * This routine may sleep; it must not be called in interrupt context
+ * (such as from within a gadget driver's disconnect() callback).
+ *
* Returns zero on success, else negative errno.
*/
int usb_gadget_deactivate(struct usb_gadget *gadget)
@@ -860,6 +863,8 @@ EXPORT_SYMBOL_GPL(usb_gadget_deactivate);
* This routine activates gadget which was previously deactivated with
* usb_gadget_deactivate() call. It calls usb_gadget_connect() if needed.
*
+ * This routine may sleep; it must not be called in interrupt context.
+ *
* Returns zero on success, else negative errno.
*/
int usb_gadget_activate(struct usb_gadget *gadget)
@@ -1638,7 +1643,11 @@ static void gadget_unbind_driver(struct device *dev)
usb_gadget_disable_async_callbacks(udc);
if (gadget->irq)
synchronize_irq(gadget->irq);
+ mutex_unlock(&udc->connect_lock);
+
udc->driver->unbind(gadget);
+
+ mutex_lock(&udc->connect_lock);
usb_gadget_udc_stop_locked(udc);
mutex_unlock(&udc->connect_lock);
--
2.41.0
This is a note to let you know that I've just added the patch titled
usb: typec: tcpm: Fix response to vsafe0V event
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 4270d2b4845e820b274702bfc2a7140f69e4d19d Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <badhri(a)google.com>
Date: Wed, 12 Jul 2023 08:57:22 +0000
Subject: usb: typec: tcpm: Fix response to vsafe0V event
Do not transition to SNK_UNATTACHED state when receiving vsafe0v event
while in SNK_HARD_RESET_WAIT_VBUS. Ignore VBUS off events as well as
in some platforms VBUS off can be signalled more than once.
[143515.364753] Requesting mux state 1, usb-role 2, orientation 2
[143515.365520] pending state change SNK_HARD_RESET_SINK_OFF -> SNK_HARD_RESET_SINK_ON @ 650 ms [rev3 HARD_RESET]
[143515.632281] CC1: 0 -> 0, CC2: 3 -> 0 [state SNK_HARD_RESET_SINK_OFF, polarity 1, disconnected]
[143515.637214] VBUS on
[143515.664985] VBUS off
[143515.664992] state change SNK_HARD_RESET_SINK_OFF -> SNK_HARD_RESET_WAIT_VBUS [rev3 HARD_RESET]
[143515.665564] VBUS VSAFE0V
[143515.665566] state change SNK_HARD_RESET_WAIT_VBUS -> SNK_UNATTACHED [rev3 HARD_RESET]
Fixes: 28b43d3d746b ("usb: typec: tcpm: Introduce vsafe0v for vbus")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Badhri Jagan Sridharan <badhri(a)google.com>
Acked-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/20230712085722.1414743-1-badhri@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/typec/tcpm/tcpm.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 829d75ebab42..cc1d83926497 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5349,6 +5349,10 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
/* Do nothing, vbus drop expected */
break;
+ case SNK_HARD_RESET_WAIT_VBUS:
+ /* Do nothing, its OK to receive vbus off events */
+ break;
+
default:
if (port->pwr_role == TYPEC_SINK && port->attached)
tcpm_set_state(port, SNK_UNATTACHED, tcpm_wait_for_discharge(port));
@@ -5395,6 +5399,9 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port)
case SNK_DEBOUNCED:
/*Do nothing, still waiting for VSAFE5V for connect */
break;
+ case SNK_HARD_RESET_WAIT_VBUS:
+ /* Do nothing, its OK to receive vbus off events */
+ break;
default:
if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled)
tcpm_set_state(port, SNK_UNATTACHED, 0);
--
2.41.0
When connecting to some DisplayPort partners, the initial status update
after entering DisplayPort Alt Mode notifies that the DFP_D/UFP_D is not in
the connected state. This leads to sending a configure message that keeps
the device in USB mode. The port partner then sets DFP_D/UFP_D to the
connected state and HPD to high in the same Attention message. Currently,
the HPD signal is dropped in order to handle configuration.
This patch saves changes to the HPD signal when the device chooses to
configure during dp_altmode_status_update, and invokes sysfs_notify if
necessary for HPD after configuring.
Fixes: 0e3bb7d6894d ("usb: typec: Add driver for DisplayPort alternate mode")
Cc: stable(a)vger.kernel.org
Signed-off-by: RD Babiera <rdbabiera(a)google.com>
---
drivers/usb/typec/altmodes/displayport.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index 66de880b28d0..cdf8261e22db 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -60,6 +60,7 @@ struct dp_altmode {
enum dp_state state;
bool hpd;
+ bool pending_hpd;
struct mutex lock; /* device lock */
struct work_struct work;
@@ -144,8 +145,13 @@ static int dp_altmode_status_update(struct dp_altmode *dp)
dp->state = DP_STATE_EXIT;
} else if (!(con & DP_CONF_CURRENTLY(dp->data.conf))) {
ret = dp_altmode_configure(dp, con);
- if (!ret)
+ if (!ret) {
dp->state = DP_STATE_CONFIGURE;
+ if (dp->hpd != hpd) {
+ dp->hpd = hpd;
+ dp->pending_hpd = true;
+ }
+ }
} else {
if (dp->hpd != hpd) {
drm_connector_oob_hotplug_event(dp->connector_fwnode);
@@ -161,6 +167,16 @@ static int dp_altmode_configured(struct dp_altmode *dp)
{
sysfs_notify(&dp->alt->dev.kobj, "displayport", "configuration");
sysfs_notify(&dp->alt->dev.kobj, "displayport", "pin_assignment");
+ /*
+ * If the DFP_D/UFP_D sends a change in HPD when first notifying the
+ * DisplayPort driver that it is connected, then we wait until
+ * configuration is complete to signal HPD.
+ */
+ if (dp->pending_hpd) {
+ drm_connector_oob_hotplug_event(dp->connector_fwnode);
+ sysfs_notify(&dp->alt->dev.kobj, "displayport", "hpd");
+ dp->pending_hpd = false;
+ }
return dp_altmode_notify(dp);
}
base-commit: fdf0eaf11452d72945af31804e2a1048ee1b574c
--
2.41.0.487.g6d72f3e995-goog
commit e11ec2b868af2b351c6c1e2e50eb711cc5423a10 upstream.
Last year, the code that manages GSI channel transactions switched
from using spinlock-protected linked lists to using indexes into the
ring buffer used for a channel. Recently, Google reported seeing
transaction reference count underflows occasionally during shutdown.
Doug Anderson found a way to reproduce the issue reliably, and
bisected the issue to the commit that eliminated the linked lists
and the lock. The root cause was ultimately determined to be
related to unused transactions being committed as part of the modem
shutdown cleanup activity. Unused transactions are not normally
expected (except in error cases).
The modem uses some ranges of IPA-resident memory, and whenever it
shuts down we zero those ranges. In ipa_filter_reset_table() a
transaction is allocated to zero modem filter table entries. If
hashing is not supported, hashed table memory should not be zeroed.
But currently nothing prevents that, and the result is an unused
transaction. Something similar occurs when we zero routing table
entries for the modem.
By preventing any attempt to clear hashed tables when hashing is not
supported, the reference count underflow is avoided in this case.
Note that there likely remains an issue with properly freeing unused
transactions (if they occur due to errors). This patch addresses
only the underflows that Google originally reported.
Fixes: d338ae28d8a8 ("net: ipa: kill all other transaction lists")
Cc: <stable(a)vger.kernel.org> # 6.1.x
Tested-by: Douglas Anderson <dianders(a)chromium.org>
Signed-off-by: Alex Elder <elder(a)linaro.org>
---
drivers/net/ipa/ipa_table.c | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index 510ff2dc8999a..cd81dd916c29e 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -311,16 +311,15 @@ static int ipa_filter_reset(struct ipa *ipa, bool modem)
if (ret)
return ret;
- ret = ipa_filter_reset_table(ipa, IPA_MEM_V4_FILTER_HASHED, modem);
- if (ret)
- return ret;
-
ret = ipa_filter_reset_table(ipa, IPA_MEM_V6_FILTER, modem);
+ if (ret || !ipa_table_hash_support(ipa))
+ return ret;
+
+ ret = ipa_filter_reset_table(ipa, IPA_MEM_V4_FILTER_HASHED, modem);
if (ret)
return ret;
- ret = ipa_filter_reset_table(ipa, IPA_MEM_V6_FILTER_HASHED, modem);
- return ret;
+ return ipa_filter_reset_table(ipa, IPA_MEM_V6_FILTER_HASHED, modem);
}
/* The AP routes and modem routes are each contiguous within the
@@ -329,11 +328,12 @@ static int ipa_filter_reset(struct ipa *ipa, bool modem)
* */
static int ipa_route_reset(struct ipa *ipa, bool modem)
{
+ bool hash_support = ipa_table_hash_support(ipa);
struct gsi_trans *trans;
u16 first;
u16 count;
- trans = ipa_cmd_trans_alloc(ipa, 4);
+ trans = ipa_cmd_trans_alloc(ipa, hash_support ? 4 : 2);
if (!trans) {
dev_err(&ipa->pdev->dev,
"no transaction for %s route reset\n",
@@ -350,12 +350,14 @@ static int ipa_route_reset(struct ipa *ipa, bool modem)
}
ipa_table_reset_add(trans, false, first, count, IPA_MEM_V4_ROUTE);
- ipa_table_reset_add(trans, false, first, count,
- IPA_MEM_V4_ROUTE_HASHED);
-
ipa_table_reset_add(trans, false, first, count, IPA_MEM_V6_ROUTE);
- ipa_table_reset_add(trans, false, first, count,
- IPA_MEM_V6_ROUTE_HASHED);
+
+ if (hash_support) {
+ ipa_table_reset_add(trans, false, first, count,
+ IPA_MEM_V4_ROUTE_HASHED);
+ ipa_table_reset_add(trans, false, first, count,
+ IPA_MEM_V6_ROUTE_HASHED);
+ }
gsi_trans_commit_wait(trans);
--
2.34.1
From: Wen Gong <wgong(a)codeaurora.org>
commit 0dc267b13f3a7e8424a898815dd357211b737330 upstream.
TKIP Michael MIC was not verified properly for PCIe cases since the
validation steps in ieee80211_rx_h_michael_mic_verify() in mac80211 did
not get fully executed due to unexpected flag values in
ieee80211_rx_status.
Fix this by setting the flags property to meet mac80211 expectations for
performing Michael MIC validation there. This fixes CVE-2020-26141. It
does the same as ath10k_htt_rx_proc_rx_ind_hl() for SDIO which passed
MIC verification case. This applies only to QCA6174/QCA9377 PCIe.
Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1
Cc: stable(a)vger.kernel.org
Signed-off-by: Wen Gong <wgong(a)codeaurora.org>
Signed-off-by: Jouni Malinen <jouni(a)codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.c3f1d42c6746.I795593fcaae941c47142…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
Signed-off-by: Ashwin Dayanand Kamat <kashwindayan(a)vmware.com>
---
drivers/net/wireless/ath/ath10k/htt_rx.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 0a7551dc0f94..b8bba6d99dba 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1645,6 +1645,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
skb_queue_walk(amsdu, msdu) {
ath10k_htt_rx_h_csum_offload(msdu);
+
+ if (frag && !fill_crypt_header &&
+ enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA)
+ status->flag &= ~RX_FLAG_MMIC_STRIPPED;
+
ath10k_htt_rx_h_undecap(ar, msdu, status, first_hdr, enctype,
is_decrypted);
@@ -1662,6 +1667,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
hdr = (void *)msdu->data;
hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+
+ if (frag && !fill_crypt_header &&
+ enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA)
+ status->flag &= ~RX_FLAG_IV_STRIPPED &
+ ~RX_FLAG_MMIC_STRIPPED;
}
}
--
2.35.6
The patch below does not apply to the 6.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.4.y
git checkout FETCH_HEAD
git cherry-pick -x 657b5146955eba331e01b9a6ae89ce2e716ba306
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080129-surface-stench-5e24@gregkh' --subject-prefix 'PATCH 6.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 657b5146955eba331e01b9a6ae89ce2e716ba306 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh(a)google.com>
Date: Wed, 26 Jul 2023 23:41:03 +0200
Subject: [PATCH] mm: lock_vma_under_rcu() must check vma->anon_vma under vma
lock
lock_vma_under_rcu() tries to guarantee that __anon_vma_prepare() can't
be called in the VMA-locked page fault path by ensuring that
vma->anon_vma is set.
However, this check happens before the VMA is locked, which means a
concurrent move_vma() can concurrently call unlink_anon_vmas(), which
disassociates the VMA's anon_vma.
This means we can get UAF in the following scenario:
THREAD 1 THREAD 2
======== ========
<page fault>
lock_vma_under_rcu()
rcu_read_lock()
mas_walk()
check vma->anon_vma
mremap() syscall
move_vma()
vma_start_write()
unlink_anon_vmas()
<syscall end>
handle_mm_fault()
__handle_mm_fault()
handle_pte_fault()
do_pte_missing()
do_anonymous_page()
anon_vma_prepare()
__anon_vma_prepare()
find_mergeable_anon_vma()
mas_walk() [looks up VMA X]
munmap() syscall (deletes VMA X)
reusable_anon_vma() [called on freed VMA X]
This is a security bug if you can hit it, although an attacker would
have to win two races at once where the first race window is only a few
instructions wide.
This patch is based on some previous discussion with Linus Torvalds on
the security list.
Cc: stable(a)vger.kernel.org
Fixes: 5e31275cc997 ("mm: add per-VMA lock and helper functions to control it")
Signed-off-by: Jann Horn <jannh(a)google.com>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/memory.c b/mm/memory.c
index 01f39e8144ef..603b2f419948 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5393,27 +5393,28 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
if (!vma_is_anonymous(vma) && !vma_is_tcp(vma))
goto inval;
- /* find_mergeable_anon_vma uses adjacent vmas which are not locked */
- if (!vma->anon_vma && !vma_is_tcp(vma))
- goto inval;
-
if (!vma_start_read(vma))
goto inval;
+ /*
+ * find_mergeable_anon_vma uses adjacent vmas which are not locked.
+ * This check must happen after vma_start_read(); otherwise, a
+ * concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA
+ * from its anon_vma.
+ */
+ if (unlikely(!vma->anon_vma && !vma_is_tcp(vma)))
+ goto inval_end_read;
+
/*
* Due to the possibility of userfault handler dropping mmap_lock, avoid
* it for now and fall back to page fault handling under mmap_lock.
*/
- if (userfaultfd_armed(vma)) {
- vma_end_read(vma);
- goto inval;
- }
+ if (userfaultfd_armed(vma))
+ goto inval_end_read;
/* Check since vm_start/vm_end might change before we lock the VMA */
- if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
- vma_end_read(vma);
- goto inval;
- }
+ if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ goto inval_end_read;
/* Check if the VMA got isolated after we found it */
if (vma->detached) {
@@ -5425,6 +5426,9 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
rcu_read_unlock();
return vma;
+
+inval_end_read:
+ vma_end_read(vma);
inval:
rcu_read_unlock();
count_vm_vma_lock_event(VMA_LOCK_ABORT);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 7b72d661f1f2f950ab8c12de7e2bc48bdac8ed69
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023080153-turkey-reload-8fa7@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
7b72d661f1f2 ("io_uring: gate iowait schedule on having pending requests")
8a796565cec3 ("io_uring: Use io_schedule* in cqring wait")
d33a39e57768 ("io_uring: keep timeout in io_wait_queue")
46ae7eef44f6 ("io_uring: optimise non-timeout waiting")
846072f16eed ("io_uring: mimimise io_cqring_wait_schedule")
3fcf19d592d5 ("io_uring: parse check_cq out of wq waiting")
12521a5d5cb7 ("io_uring: fix CQ waiting timeout handling")
52ea806ad983 ("io_uring: finish waiting before flushing overflow entries")
35d90f95cfa7 ("io_uring: include task_work run after scheduling in wait for events")
1b346e4aa8e7 ("io_uring: don't check overflow flush failures")
a85381d8326d ("io_uring: skip overflow CQE posting for dying ring")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7b72d661f1f2f950ab8c12de7e2bc48bdac8ed69 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Mon, 24 Jul 2023 11:28:17 -0600
Subject: [PATCH] io_uring: gate iowait schedule on having pending requests
A previous commit made all cqring waits marked as iowait, as a way to
improve performance for short schedules with pending IO. However, for
use cases that have a special reaper thread that does nothing but
wait on events on the ring, this causes a cosmetic issue where we
know have one core marked as being "busy" with 100% iowait.
While this isn't a grave issue, it is confusing to users. Rather than
always mark us as being in iowait, gate setting of current->in_iowait
to 1 by whether or not the waiting task has pending requests.
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/io-uring/CAMEGJJ2RxopfNQ7GNLhr7X9=bHXKo+G5OOe0LUq=+…
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217699
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217700
Reported-by: Oleksandr Natalenko <oleksandr(a)natalenko.name>
Reported-by: Phil Elwell <phil(a)raspberrypi.com>
Tested-by: Andres Freund <andres(a)anarazel.de>
Fixes: 8a796565cec3 ("io_uring: Use io_schedule* in cqring wait")
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 89a611541bc4..f4591b912ea8 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2493,11 +2493,20 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx)
return 0;
}
+static bool current_pending_io(void)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (!tctx)
+ return false;
+ return percpu_counter_read_positive(&tctx->inflight);
+}
+
/* when returns >0, the caller should retry */
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq)
{
- int token, ret;
+ int io_wait, ret;
if (unlikely(READ_ONCE(ctx->check_cq)))
return 1;
@@ -2511,17 +2520,19 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
return 0;
/*
- * Use io_schedule_prepare/finish, so cpufreq can take into account
- * that the task is waiting for IO - turns out to be important for low
- * QD IO.
+ * Mark us as being in io_wait if we have pending requests, so cpufreq
+ * can take into account that the task is waiting for IO - turns out
+ * to be important for low QD IO.
*/
- token = io_schedule_prepare();
+ io_wait = current->in_iowait;
+ if (current_pending_io())
+ current->in_iowait = 1;
ret = 0;
if (iowq->timeout == KTIME_MAX)
schedule();
else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
ret = -ETIME;
- io_schedule_finish(token);
+ current->in_iowait = io_wait;
return ret;
}
We found an issue with null pointer access due to kprobe debug exception
error handling on 5.10, and I proposed a separate fix patch for 5.10,
see [1]. But as Greg gave advice, we always choose to backport relevant
patches from upstream to fix issues with stable kernels, so I made this
patch set.
The main one we need to backport is patch 5, which uses int3 instead of
debug trap for single-stepping, thus avoiding the problems we
encountered with kprobe debug exception error handling. Patches 1-4 are
pre-patches, and patches 6-9 are fixes for patch 5. The major
modifications are patch 2 and patch 5. Patch 2 optimizes
resume_execution() to avoid repeated instruction decoding, and patch 5
uses int3 instead of debug trap, and as Masami said in the commit
message this patch will change some behavior of kprobe, but it has
almost no effect on the actual usage.
Please let me know if there are any problems, thanks!
[1] https://lore.kernel.org/lkml/20230630020845.227939-1-lihuafei1@huawei.com/
Gustavo A. R. Silva (1):
kprobes/x86: Fix fall-through warnings for Clang
Masami Hiramatsu (5):
x86/kprobes: Do not decode opcode in resume_execution()
x86/kprobes: Retrieve correct opcode for group instruction
x86/kprobes: Identify far indirect JMP correctly
x86/kprobes: Use int3 instead of debug trap for single-step
x86/kprobes: Fix to identify indirect jmp and others using range case
Masami Hiramatsu (Google) (1):
x86/kprobes: Update kcb status flag after singlestepping
Nadav Amit (1):
x86/kprobes: Fix JNG/JNLE emulation
Wei Yongjun (1):
x86/kprobes: Move 'inline' to the beginning of the kprobe_is_ss()
declaration
arch/x86/include/asm/kprobes.h | 24 +-
arch/x86/kernel/kprobes/core.c | 639 ++++++++++++++++++++-------------
arch/x86/kernel/traps.c | 3 -
3 files changed, 409 insertions(+), 257 deletions(-)
--
2.17.1
NOTE: This patch is tested against 5.4 stable
NOTE: This is a patch for the 5.4 stable branch, not for the torvalds tree.
The torvalds tree, and stable tree 5.10, 5.15, 6.1 and 6.4 branches
were fixed in the separate
commit ID 4acfe3dfde68 ("test_firmware: prevent race conditions by a correct implementation of locking")
which was incompatible with 5.4
Dan Carpenter spotted a race condition in a couple of situations like
these in the test_firmware driver:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
u8 val;
int ret;
ret = kstrtou8(buf, 10, &val);
if (ret)
return ret;
mutex_lock(&test_fw_mutex);
*(u8 *)cfg = val;
mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
static ssize_t config_num_requests_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int rc;
mutex_lock(&test_fw_mutex);
if (test_fw_config->reqs) {
pr_err("Must call release_all_firmware prior to changing config\n");
rc = -EINVAL;
mutex_unlock(&test_fw_mutex);
goto out;
}
mutex_unlock(&test_fw_mutex);
// NOTE: HERE is the race!!! Function can be preempted!
// test_fw_config->reqs can change between the release of
// the lock about and acquire of the lock in the
// test_dev_config_update_u8()
rc = test_dev_config_update_u8(buf, count,
&test_fw_config->num_requests);
out:
return rc;
}
static ssize_t config_read_fw_idx_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
return test_dev_config_update_u8(buf, count,
&test_fw_config->read_fw_idx);
}
The function test_dev_config_update_u8() is called from both the locked
and the unlocked context, function config_num_requests_store() and
config_read_fw_idx_store() which can both be called asynchronously as
they are driver's methods, while test_dev_config_update_u8() and siblings
change their argument pointed to by u8 *cfg or similar pointer.
To avoid deadlock on test_fw_mutex, the lock is dropped before calling
test_dev_config_update_u8() and re-acquired within test_dev_config_update_u8()
itself, but alas this creates a race condition.
Having two locks wouldn't assure a race-proof mutual exclusion.
This situation is best avoided by the introduction of a new, unlocked
function __test_dev_config_update_u8() which can be called from the locked
context and reducing test_dev_config_update_u8() to:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
mutex_lock(&test_fw_mutex);
ret = __test_dev_config_update_u8(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
}
doing the locking and calling the unlocked primitive, which enables both
locked and unlocked versions without duplication of code.
Fixes: c92316bf8e948 ("test_firmware: add batched firmware tests")
Cc: Luis R. Rodriguez <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.4
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/r/20230509084746.48259-1-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
---
lib/test_firmware.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38553944e967..92d7195d5b5b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -301,16 +301,26 @@ static ssize_t config_test_show_str(char *dst,
return len;
}
-static int test_dev_config_update_bool(const char *buf, size_t size,
- bool *cfg)
+static inline int __test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
{
int ret;
- mutex_lock(&test_fw_mutex);
if (strtobool(buf, cfg) < 0)
ret = -EINVAL;
else
ret = size;
+
+ return ret;
+}
+
+static int test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_bool(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
@@ -340,7 +350,7 @@ static ssize_t test_dev_config_show_int(char *buf, int cfg)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+static inline int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
long new;
@@ -352,14 +362,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
if (new > U8_MAX)
return -EINVAL;
- mutex_lock(&test_fw_mutex);
*(u8 *)cfg = new;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
+static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_u8(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+}
+
static ssize_t test_dev_config_show_u8(char *buf, u8 cfg)
{
u8 val;
@@ -392,10 +411,10 @@ static ssize_t config_num_requests_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_u8(buf, count,
- &test_fw_config->num_requests);
+ rc = __test_dev_config_update_u8(buf, count,
+ &test_fw_config->num_requests);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
--
2.34.1
During recent vma locking patch reviews Linus and Jann Horn noted a number
of issues with vma locking and suggested improvements:
1. walk_page_range() does not have ability to write-lock a vma during the
walk when it's done under mmap_write_lock. For example s390_reset_cmma().
2. Vma locking is hidden inside vm_flags modifiers and is hard to follow.
Suggestion is to change vm_flags_reset{_once} to assert that vma is
write-locked and require an explicit locking.
3. Same issue with vma_prepare() hiding vma locking.
4. In userfaultfd vm_flags are modified after vma->vm_userfaultfd_ctx and
page faults can operate on a context while it's changed.
5. do_brk_flags() and __install_special_mapping() not locking a newly
created vma before adding it into the mm. While not strictly a problem,
this is fragile if vma is modified after insertion, as in the
mmap_region() case which was recently fixed. Suggestion is to always lock
a new vma before inserting it and making it visible to page faults.
6. vma_assert_write_locked() for CONFIG_PER_VMA_LOCK=n would benefit from
being mmap_assert_write_locked() instead of no-op and then any place which
operates on a vma and calls mmap_assert_write_locked() can be converted
into vma_assert_write_locked().
I CC'ed stable only on the first patch because others are cleanups and the
bug in userfaultfd does not affect stable (lock_vma_under_rcu prevents
uffds from being handled under vma lock protection). However I would be
happy if the whole series is merged into stable 6.4 since it makes vma
locking more maintainable.
The patches apply cleanly over Linus' ToT and will conflict when applied
over mm-unstable due to missing [1]. The conflict can be easily resolved
by ignoring conflicting deletions but probably simpler to take [1] into
mm-unstable and avoid later conflict.
[1] commit 6c21e066f925 ("mm/mempolicy: Take VMA lock before replacing policy")
Changes since v1:
- replace walk_page_range() parameter with mm_walk_ops.walk_lock,
per Linus
- introduced page_walk_lock enum to allow different locking modes
during a walk, per Linus
- added Liam's Reviewed-by
Suren Baghdasaryan (6):
mm: enable page walking API to lock vmas during the walk
mm: for !CONFIG_PER_VMA_LOCK equate write lock assertion for vma and
mmap
mm: replace mmap with vma write lock assertions when operating on a
vma
mm: lock vma explicitly before doing vm_flags_reset and
vm_flags_reset_once
mm: always lock new vma before inserting into vma tree
mm: move vma locking out of vma_prepare
arch/powerpc/kvm/book3s_hv_uvmem.c | 1 +
arch/powerpc/mm/book3s64/subpage_prot.c | 1 +
arch/riscv/mm/pageattr.c | 1 +
arch/s390/mm/gmap.c | 5 ++++
drivers/infiniband/hw/hfi1/file_ops.c | 1 +
fs/proc/task_mmu.c | 5 ++++
fs/userfaultfd.c | 6 +++++
include/linux/mm.h | 13 ++++++---
include/linux/pagewalk.h | 11 ++++++++
mm/damon/vaddr.c | 2 ++
mm/hmm.c | 1 +
mm/hugetlb.c | 2 +-
mm/khugepaged.c | 5 ++--
mm/ksm.c | 25 ++++++++++-------
mm/madvise.c | 8 +++---
mm/memcontrol.c | 2 ++
mm/memory-failure.c | 1 +
mm/memory.c | 2 +-
mm/mempolicy.c | 22 +++++++++------
mm/migrate_device.c | 1 +
mm/mincore.c | 1 +
mm/mlock.c | 4 ++-
mm/mmap.c | 29 +++++++++++++-------
mm/mprotect.c | 2 ++
mm/pagewalk.c | 36 ++++++++++++++++++++++---
mm/vmscan.c | 1 +
26 files changed, 146 insertions(+), 42 deletions(-)
--
2.41.0.585.gd2178a4bd4-goog
When setting ZT0 via ptrace we do not currently force a reload of the
floating point register state from memory, do that to ensure that the newly
set value gets loaded into the registers on next task execution.
The function was templated off the function for FPSIMD which due to our
providing the option of embedding a FPSIMD regset within the SVE regset
does not directly include the flush.
Fixes: f90b529bcbe5 ("arm64/sme: Implement ZT0 ptrace support")
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
arch/arm64/kernel/ptrace.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index d7f4f0d1ae12..740e81e9db04 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1180,6 +1180,8 @@ static int zt_set(struct task_struct *target,
if (ret == 0)
target->thread.svcr |= SVCR_ZA_MASK;
+ fpsimd_flush_task_state(target);
+
return ret;
}
---
base-commit: 5d0c230f1de8c7515b6567d9afba1f196fb4e2f4
change-id: 20230802-arm64-fix-ptrace-zt0-flush-d6d71b9f8461
Best regards,
--
Mark Brown <broonie(a)kernel.org>
When setting SME vector lengths we clear TIF_SME to reenable SME traps,
doing a reallocation of the backing storage on next use. We do this using
clear_thread_flag() which operates on the current thread, meaning that when
setting the vector length via ptrace we may both not force traps for the
target task and force a spurious flush of any SME state that the tracing
task may have.
Clear the flag in the target task.
Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers")
Reported-by: David Spickett <David.Spickett(a)arm.com>
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
arch/arm64/kernel/fpsimd.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 520b681a07bb..a61a1fd6492d 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -909,7 +909,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
*/
task->thread.svcr &= ~(SVCR_SM_MASK |
SVCR_ZA_MASK);
- clear_thread_flag(TIF_SME);
+ clear_tsk_thread_flag(task, TIF_SME);
free_sme = true;
}
}
---
base-commit: 5d0c230f1de8c7515b6567d9afba1f196fb4e2f4
change-id: 20230802-arm64-fix-ptrace-tif-sme-0bfd94c8266d
Best regards,
--
Mark Brown <broonie(a)kernel.org>
The original patches fixing CVE-2023-1076 are incorrect in my opinion.
This small series fixes them up; see the individual commit messages for
explanation.
I have a very elaborate test procedure demonstrating the problem for
both tun and tap; it involves libvirt, qemu, and "crash". I can share
that procedure if necessary, but it's indeed quite long (I wrote it
originally for our QE team).
The patches in this series are supposed to "re-fix" CVE-2023-1076; given
that said CVE is classified as Low Impact (CVSSv3=5.5), I'm posting this
publicly, and not suggesting any embargo. Red Hat Product Security may
assign a new CVE number later.
I've tested the patches on top of v6.5-rc4, with "crash" built at commit
c74f375e0ef7.
Cc: Eric Dumazet <edumazet(a)google.com>
Cc: Lorenzo Colitti <lorenzo(a)google.com>
Cc: Paolo Abeni <pabeni(a)redhat.com>
Cc: Pietro Borrello <borrello(a)diag.uniroma1.it>
Cc: netdev(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
Laszlo Ersek (2):
net: tun_chr_open(): set sk_uid from current_fsuid()
net: tap_open(): set sk_uid from current_fsuid()
drivers/net/tap.c | 2 +-
drivers/net/tun.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
base-commit: 5d0c230f1de8c7515b6567d9afba1f196fb4e2f4
[ Upstream commit 4acfe3dfde685a5a9eaec5555351918e2d7266a1 ]
Dan Carpenter spotted a race condition in a couple of situations like
these in the test_firmware driver:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
u8 val;
int ret;
ret = kstrtou8(buf, 10, &val);
if (ret)
return ret;
mutex_lock(&test_fw_mutex);
*(u8 *)cfg = val;
mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
static ssize_t config_num_requests_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int rc;
mutex_lock(&test_fw_mutex);
if (test_fw_config->reqs) {
pr_err("Must call release_all_firmware prior to changing config\n");
rc = -EINVAL;
mutex_unlock(&test_fw_mutex);
goto out;
}
mutex_unlock(&test_fw_mutex);
// NOTE: HERE is the race!!! Function can be preempted!
// test_fw_config->reqs can change between the release of
// the lock about and acquire of the lock in the
// test_dev_config_update_u8()
rc = test_dev_config_update_u8(buf, count,
&test_fw_config->num_requests);
out:
return rc;
}
static ssize_t config_read_fw_idx_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
return test_dev_config_update_u8(buf, count,
&test_fw_config->read_fw_idx);
}
The function test_dev_config_update_u8() is called from both the locked
and the unlocked context, function config_num_requests_store() and
config_read_fw_idx_store() which can both be called asynchronously as
they are driver's methods, while test_dev_config_update_u8() and siblings
change their argument pointed to by u8 *cfg or similar pointer.
To avoid deadlock on test_fw_mutex, the lock is dropped before calling
test_dev_config_update_u8() and re-acquired within test_dev_config_update_u8()
itself, but alas this creates a race condition.
Having two locks wouldn't assure a race-proof mutual exclusion.
This situation is best avoided by the introduction of a new, unlocked
function __test_dev_config_update_u8() which can be called from the locked
context and reducing test_dev_config_update_u8() to:
static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
mutex_lock(&test_fw_mutex);
ret = __test_dev_config_update_u8(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
}
doing the locking and calling the unlocked primitive, which enables both
locked and unlocked versions without duplication of code.
Fixes: c92316bf8e948 ("test_firmware: add batched firmware tests")
Cc: Luis R. Rodriguez <mcgrof(a)kernel.org>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Russ Weight <russell.h.weight(a)intel.com>
Cc: Takashi Iwai <tiwai(a)suse.de>
Cc: Tianfei Zhang <tianfei.zhang(a)intel.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Colin Ian King <colin.i.king(a)gmail.com>
Cc: Randy Dunlap <rdunlap(a)infradead.org>
Cc: linux-kselftest(a)vger.kernel.org
Cc: stable(a)vger.kernel.org # v5.4, 4.19
Suggested-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/r/20230509084746.48259-1-mirsad.todorovac@alu.unizg…
Signed-off-by: Mirsad Todorovac <mirsad.todorovac(a)alu.unizg.hr>
[ This is the patch to fix the racing condition in locking for the 5.4, ]
[ 4.19 and 4.4 stable branches. Not all the fixes from the upstream ]
[ commit apply, but those which do are verbatim equal to those in the ]
[ upstream commit. ]
---
v2:
bundled locking and ENOSPC patches together.
tested on 5.4 and 4.19 stable.
lib/test_firmware.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38553944e967..92d7195d5b5b 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -301,16 +301,26 @@ static ssize_t config_test_show_str(char *dst,
return len;
}
-static int test_dev_config_update_bool(const char *buf, size_t size,
- bool *cfg)
+static inline int __test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
{
int ret;
- mutex_lock(&test_fw_mutex);
if (strtobool(buf, cfg) < 0)
ret = -EINVAL;
else
ret = size;
+
+ return ret;
+}
+
+static int test_dev_config_update_bool(const char *buf, size_t size,
+ bool *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_bool(buf, size, cfg);
mutex_unlock(&test_fw_mutex);
return ret;
@@ -340,7 +350,7 @@ static ssize_t test_dev_config_show_int(char *buf, int cfg)
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
-static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+static inline int __test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
{
int ret;
long new;
@@ -352,14 +362,23 @@ static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
if (new > U8_MAX)
return -EINVAL;
- mutex_lock(&test_fw_mutex);
*(u8 *)cfg = new;
- mutex_unlock(&test_fw_mutex);
/* Always return full write size even if we didn't consume all */
return size;
}
+static int test_dev_config_update_u8(const char *buf, size_t size, u8 *cfg)
+{
+ int ret;
+
+ mutex_lock(&test_fw_mutex);
+ ret = __test_dev_config_update_u8(buf, size, cfg);
+ mutex_unlock(&test_fw_mutex);
+
+ return ret;
+}
+
static ssize_t test_dev_config_show_u8(char *buf, u8 cfg)
{
u8 val;
@@ -392,10 +411,10 @@ static ssize_t config_num_requests_store(struct device *dev,
mutex_unlock(&test_fw_mutex);
goto out;
}
- mutex_unlock(&test_fw_mutex);
- rc = test_dev_config_update_u8(buf, count,
- &test_fw_config->num_requests);
+ rc = __test_dev_config_update_u8(buf, count,
+ &test_fw_config->num_requests);
+ mutex_unlock(&test_fw_mutex);
out:
return rc;
--
2.39.3
Unfortunately commit 474098edac26 ("mm/gup: replace FOLL_NUMA by
gup_can_follow_protnone()") missed that follow_page() and
follow_trans_huge_pmd() never implicitly set FOLL_NUMA because they really
don't want to fail on PROT_NONE-mapped pages -- either due to NUMA hinting
or due to inaccessible (PROT_NONE) VMAs.
As spelled out in commit 0b9d705297b2 ("mm: numa: Support NUMA hinting page
faults from gup/gup_fast"): "Other follow_page callers like KSM should not
use FOLL_NUMA, or they would fail to get the pages if they use follow_page
instead of get_user_pages."
liubo reported [1] that smaps_rollup results are imprecise, because they
miss accounting of pages that are mapped PROT_NONE. Further, it's easy
to reproduce that KSM no longer works on inaccessible VMAs on x86-64,
because pte_protnone()/pmd_protnone() also indictaes "true" in
inaccessible VMAs, and follow_page() refuses to return such pages right
now.
As KVM really depends on these NUMA hinting faults, removing the
pte_protnone()/pmd_protnone() handling in GUP code completely is not really
an option.
To fix the issues at hand, let's revive FOLL_NUMA as FOLL_HONOR_NUMA_FAULT
to restore the original behavior for now and add better comments.
Set FOLL_HONOR_NUMA_FAULT independent of FOLL_FORCE in
is_valid_gup_args(), to add that flag for all external GUP users.
Note that there are three GUP-internal __get_user_pages() users that don't
end up calling is_valid_gup_args() and consequently won't get
FOLL_HONOR_NUMA_FAULT set.
1) get_dump_page(): we really don't want to handle NUMA hinting
faults. It specifies FOLL_FORCE and wouldn't have honored NUMA
hinting faults already.
2) populate_vma_page_range(): we really don't want to handle NUMA hinting
faults. It specifies FOLL_FORCE on accessible VMAs, so it wouldn't have
honored NUMA hinting faults already.
3) faultin_vma_page_range(): we similarly don't want to handle NUMA
hinting faults.
To make the combination of FOLL_FORCE and FOLL_HONOR_NUMA_FAULT work in
inaccessible VMAs properly, we have to perform VMA accessibility checks in
gup_can_follow_protnone().
As GUP-fast should reject such pages either way in
pte_access_permitted()/pmd_access_permitted() -- for example on x86-64 and
arm64 that both implement pte_protnone() -- let's just always fallback
to ordinary GUP when stumbling over pte_protnone()/pmd_protnone().
As Linus notes [2], honoring NUMA faults might only make sense for
selected GUP users.
So we should really see if we can instead let relevant GUP callers specify
it manually, and not trigger NUMA hinting faults from GUP as default.
Prepare for that by making FOLL_HONOR_NUMA_FAULT an external GUP flag
and adding appropriate documenation.
While at it, remove a stale comment from follow_trans_huge_pmd(): That
comment for pmd_protnone() was added in commit 2b4847e73004 ("mm: numa:
serialise parallel get_user_page against THP migration"), which noted:
THP does not unmap pages due to a lack of support for migration
entries at a PMD level. This allows races with get_user_pages
Nowadays, we do have PMD migration entries, so the comment no longer
applies. Let's drop it.
[1] https://lore.kernel.org/r/20230726073409.631838-1-liubo254@huawei.com
[2] https://lore.kernel.org/r/CAHk-=wgRiP_9X0rRdZKT8nhemZGNateMtb366t37d8-x7VRs…
Reported-by: liubo <liubo254(a)huawei.com>
Closes: https://lore.kernel.org/r/20230726073409.631838-1-liubo254@huawei.com
Reported-by: Peter Xu <peterx(a)redhat.com>
Closes: https://lore.kernel.org/all/ZMKJjDaqZ7FW0jfe@x1n/
Fixes: 474098edac26 ("mm/gup: replace FOLL_NUMA by gup_can_follow_protnone()")
Acked-by: Mel Gorman <mgorman(a)techsingularity.net>
Acked-by: Peter Xu <peterx(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: David Hildenbrand <david(a)redhat.com>
---
include/linux/mm.h | 21 +++++++++++++++------
include/linux/mm_types.h | 9 +++++++++
mm/gup.c | 30 ++++++++++++++++++++++++------
mm/huge_memory.c | 3 +--
4 files changed, 49 insertions(+), 14 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63edff994c32..ba38b78a1b84 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3525,15 +3525,24 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
* Indicates whether GUP can follow a PROT_NONE mapped page, or whether
* a (NUMA hinting) fault is required.
*/
-static inline bool gup_can_follow_protnone(unsigned int flags)
+static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
+ unsigned int flags)
{
/*
- * FOLL_FORCE has to be able to make progress even if the VMA is
- * inaccessible. Further, FOLL_FORCE access usually does not represent
- * application behaviour and we should avoid triggering NUMA hinting
- * faults.
+ * If callers don't want to honor NUMA hinting faults, no need to
+ * determine if we would actually have to trigger a NUMA hinting fault.
*/
- return flags & FOLL_FORCE;
+ if (!(flags & FOLL_HONOR_NUMA_FAULT))
+ return true;
+
+ /*
+ * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
+ *
+ * Requiring a fault here even for inaccessible VMAs would mean that
+ * FOLL_FORCE cannot make any progress, because handle_mm_fault()
+ * refuses to process NUMA hinting faults in inaccessible VMAs.
+ */
+ return !vma_is_accessible(vma);
}
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index aae6af098031..291c05cacd48 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1278,6 +1278,15 @@ enum {
FOLL_PCI_P2PDMA = 1 << 10,
/* allow interrupts from generic signals */
FOLL_INTERRUPTIBLE = 1 << 11,
+ /*
+ * Always honor (trigger) NUMA hinting faults.
+ *
+ * FOLL_WRITE implicitly honors NUMA hinting faults because a
+ * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE
+ * apply). get_user_pages_fast_only() always implicitly honors NUMA
+ * hinting faults.
+ */
+ FOLL_HONOR_NUMA_FAULT = 1 << 12,
/* See also internal only FOLL flags in mm/internal.h */
};
diff --git a/mm/gup.c b/mm/gup.c
index 2493ffa10f4b..3bbfae411880 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -597,7 +597,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
pte = ptep_get(ptep);
if (!pte_present(pte))
goto no_page;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
goto no_page;
page = vm_normal_page(vma, address, pte);
@@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
- if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
+ if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
return no_page_table(vma, flags);
ptl = pmd_lock(mm, pmd);
@@ -844,6 +844,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
return NULL;
+ /*
+ * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
+ * to fail on PROT_NONE-mapped pages.
+ */
page = follow_page_mask(vma, address, foll_flags, &ctx);
if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap);
@@ -2240,6 +2244,13 @@ static bool is_valid_gup_args(struct page **pages, int *locked,
gup_flags |= FOLL_UNLOCKABLE;
}
+ /*
+ * For now, always trigger NUMA hinting faults. Some GUP users like
+ * KVM require the hint to be as the calling context of GUP is
+ * functionally similar to a memory reference from task context.
+ */
+ gup_flags |= FOLL_HONOR_NUMA_FAULT;
+
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
@@ -2564,7 +2575,14 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
struct page *page;
struct folio *folio;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ /*
+ * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
+ * pte_access_permitted() better should reject these pages
+ * either way: otherwise, GUP-fast might succeed in
+ * cases where ordinary GUP would fail due to VMA access
+ * permissions.
+ */
+ if (pte_protnone(pte))
goto pte_unmap;
if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@@ -2983,8 +3001,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
pmd_devmap(pmd))) {
- if (pmd_protnone(pmd) &&
- !gup_can_follow_protnone(flags))
+ /* See gup_pte_range() */
+ if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@@ -3164,7 +3182,7 @@ static int internal_get_user_pages_fast(unsigned long start,
if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
FOLL_FORCE | FOLL_PIN | FOLL_GET |
FOLL_FAST_ONLY | FOLL_NOFAULT |
- FOLL_PCI_P2PDMA)))
+ FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
return -EINVAL;
if (gup_flags & FOLL_PIN)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2e2e8a24cc71..0b709d2c46c6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1467,8 +1467,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
return ERR_PTR(-EFAULT);
- /* Full NUMA hinting faults to serialise migration in fault paths */
- if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
+ if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
--
2.41.0
Hello,
Since installing the latest updates for Linux Mint yesterday or this
morning, YouTube is acting super goofy via Brave Browser. I cannot
search for anything at all. Nothing happens. Secondly, it is
occasionally reporting that it is disconnected from the internet and to
try again. Firefox works fine, however. I reported the issue to Brave,
and have now communicated this with you to be certain everyone is aware.
I have some other stuff to discuss with you when you are available. I
have information that you may find helpful and disturbing regarding
Linux security. If you would like to discuss this, please reply.
My System:
Kernel: 5.4.0-155-generic x86_64 bits: 64 compiler: gcc v: 9.4.0
Desktop: Cinnamon 5.2.7 wm: muffin dm: LightDM Distro: Linux Mint 20.3 Una
base: Ubuntu 20.04 focal
Regards,
Misty Plianca
We don't want absolute symbols references in the stub, so fix the double
negation in the comment.
Fixes: d7071743db31 ("RISC-V: Add EFI stub support.")
Cc: stable(a)vger.kernel.org
Signed-off-by: Xiao Wang <xiao.w.wang(a)intel.com>
---
drivers/firmware/efi/libstub/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index b0f8c495c10f..ed6e8ebd89b4 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -146,7 +146,7 @@ STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS
# For RISC-V, we don't need anything special other than arm64. Keep all the
# symbols in .init section and make sure that no absolute symbols references
-# doesn't exist.
+# exist.
STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \
--prefix-symbols=__efistub_
STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20
--
2.25.1
The commit ef9ff6017e3c4 ("perf ui browser: Move the extra title lines
from the hists browser") introduced ui_browser__gotorc_title() to help
moving non-title lines easily. But it missed to update the title for
the hierarchy mode so it won't print the header line on TUI at all.
$ perf report --hierarchy
Fixes: ef9ff6017e3c4 ("perf ui browser: Move the extra title lines from the hists browser")
Cc: stable(a)vger.kernel.org
Signed-off-by: Namhyung Kim <namhyung(a)kernel.org>
---
tools/perf/ui/browsers/hists.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index c7ad9e003080..d8b88f10a48d 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1779,7 +1779,7 @@ static void hists_browser__hierarchy_headers(struct hist_browser *browser)
hists_browser__scnprintf_hierarchy_headers(browser, headers,
sizeof(headers));
- ui_browser__gotorc(&browser->b, 0, 0);
+ ui_browser__gotorc_title(&browser->b, 0, 0);
ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
}
--
2.41.0.487.g6d72f3e995-goog
From: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
The sc16is7xx_config_rs485() function is called only for the second
port (index 1, channel B), causing initialization problems for the
first port.
For the sc16is7xx driver, port->membase and port->mapbase are not set,
and their default values are 0. And we set port->iobase to the device
index. This means that when the first device is registered using the
uart_add_one_port() function, the following values will be in the port
structure:
port->membase = 0
port->mapbase = 0
port->iobase = 0
Therefore, the function uart_configure_port() in serial_core.c will
exit early because of the following check:
/*
* If there isn't a port here, don't do anything further.
*/
if (!port->iobase && !port->mapbase && !port->membase)
return;
Typically, I2C and SPI drivers do not set port->membase and
port->mapbase.
The max310x driver sets port->membase to ~0 (all ones). By
implementing the same change in this driver, uart_configure_port() is
now correctly executed for all ports.
Fixes: dfeae619d781 ("serial: sc16is7xx")
Cc: <stable(a)vger.kernel.org> # 6.1.x
Signed-off-by: Hugo Villeneuve <hvilleneuve(a)dimonoff.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Reviewed-by: Lech Perczak <lech.perczak(a)camlingroup.com>
Tested-by: Lech Perczak <lech.perczak(a)camlingroup.com>
---
drivers/tty/serial/sc16is7xx.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 2e7e7c409cf2..8ae2afc76a9b 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1436,6 +1436,7 @@ static int sc16is7xx_probe(struct device *dev,
s->p[i].port.fifosize = SC16IS7XX_FIFO_SIZE;
s->p[i].port.flags = UPF_FIXED_TYPE | UPF_LOW_LATENCY;
s->p[i].port.iobase = i;
+ s->p[i].port.membase = (void __iomem *)~0;
s->p[i].port.iotype = UPIO_PORT;
s->p[i].port.uartclk = freq;
s->p[i].port.rs485_config = sc16is7xx_config_rs485;
--
2.30.2
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 4a0889dd4c1d0..709b24a812b5d 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6429,7 +6429,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 5d657178c2b98..8be1c0b4e2a86 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6443,7 +6443,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 0abce779fbb13..12602b161bd80 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6699,7 +6699,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 68ff233f936e5..3ff76ca147a5a 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6790,7 +6790,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 4bb0a15cfcc01..54aff304cdcf4 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6954,7 +6954,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index d38ebd7281b9b..326ef4942e672 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6960,7 +6960,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Boris Brezillon <boris.brezillon(a)collabora.com>
[ Upstream commit e30cb0599799aac099209e3b045379613c80730e ]
drm_sched_entity_kill_jobs_cb() logic is omitting the last fence popped
from the dependency array that was waited upon before
drm_sched_entity_kill() was called (drm_sched_entity::dependency field),
so we're basically waiting for all dependencies except one.
In theory, this wait shouldn't be needed because resources should have
their users registered to the dma_resv object, thus guaranteeing that
future jobs wanting to access these resources wait on all the previous
users (depending on the access type, of course). But we want to keep
these explicit waits in the kill entity path just in case.
Let's make sure we keep all dependencies in the array in
drm_sched_job_dependency(), so we can iterate over the array and wait
in drm_sched_entity_kill_jobs_cb().
We also make sure we wait on drm_sched_fence::finished if we were
originally asked to wait on drm_sched_fence::scheduled. In that case,
we assume the intent was to delegate the wait to the firmware/GPU or
rely on the pipelining done at the entity/scheduler level, but when
killing jobs, we really want to wait for completion not just scheduling.
v2:
- Don't evict deps in drm_sched_job_dependency()
v3:
- Always wait for drm_sched_fence::finished fences in
drm_sched_entity_kill_jobs_cb() when we see a sched_fence
v4:
- Fix commit message
- Fix a use-after-free bug
v5:
- Flag deps on which we should only wait for the scheduled event
at insertion time
v6:
- Back to v4 implementation
- Add Christian's R-b
Cc: Frank Binns <frank.binns(a)imgtec.com>
Cc: Sarah Walker <sarah.walker(a)imgtec.com>
Cc: Donald Robson <donald.robson(a)imgtec.com>
Cc: Luben Tuikov <luben.tuikov(a)amd.com>
Cc: David Airlie <airlied(a)gmail.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Signed-off-by: Boris Brezillon <boris.brezillon(a)collabora.com>
Suggested-by: "Christian König" <christian.koenig(a)amd.com>
Reviewed-by: "Christian König" <christian.koenig(a)amd.com>
Acked-by: Luben Tuikov <luben.tuikov(a)amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230619071921.3465992-1-bori…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/scheduler/sched_entity.c | 41 +++++++++++++++++++-----
1 file changed, 33 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index e0a8890a62e23..42021d1f7e016 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -155,16 +155,32 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
{
struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
finish_cb);
- int r;
+ unsigned long index;
dma_fence_put(f);
/* Wait for all dependencies to avoid data corruptions */
- while (!xa_empty(&job->dependencies)) {
- f = xa_erase(&job->dependencies, job->last_dependency++);
- r = dma_fence_add_callback(f, &job->finish_cb,
- drm_sched_entity_kill_jobs_cb);
- if (!r)
+ xa_for_each(&job->dependencies, index, f) {
+ struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+
+ if (s_fence && f == &s_fence->scheduled) {
+ /* The dependencies array had a reference on the scheduled
+ * fence, and the finished fence refcount might have
+ * dropped to zero. Use dma_fence_get_rcu() so we get
+ * a NULL fence in that case.
+ */
+ f = dma_fence_get_rcu(&s_fence->finished);
+
+ /* Now that we have a reference on the finished fence,
+ * we can release the reference the dependencies array
+ * had on the scheduled fence.
+ */
+ dma_fence_put(&s_fence->scheduled);
+ }
+
+ xa_erase(&job->dependencies, index);
+ if (f && !dma_fence_add_callback(f, &job->finish_cb,
+ drm_sched_entity_kill_jobs_cb))
return;
dma_fence_put(f);
@@ -394,8 +410,17 @@ static struct dma_fence *
drm_sched_job_dependency(struct drm_sched_job *job,
struct drm_sched_entity *entity)
{
- if (!xa_empty(&job->dependencies))
- return xa_erase(&job->dependencies, job->last_dependency++);
+ struct dma_fence *f;
+
+ /* We keep the fence around, so we can iterate over all dependencies
+ * in drm_sched_entity_kill_jobs_cb() to ensure all deps are signaled
+ * before killing the job.
+ */
+ f = xa_load(&job->dependencies, job->last_dependency);
+ if (f) {
+ job->last_dependency++;
+ return dma_fence_get(f);
+ }
if (job->sched->ops->prepare_job)
return job->sched->ops->prepare_job(job, entity);
--
2.40.1