From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit 42f86b1226a42bfc79a7125af435432ad4680a32 ]
In some cases xattr is too fragmented, so we need to load it before writing.
Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/xattr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 0968565ff2ca..fa9f42e50503 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -118,7 +118,7 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea,
run_init(&run);
- err = attr_load_runs(attr_ea, ni, &run, NULL); + err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &run, 0, size); if (!err) err = ntfs_read_run_nb(sbi, &run, 0, ea_p, size, NULL); run_close(&run); @@ -443,6 +443,11 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name, /* Delete xattr, ATTR_EA */ ni_remove_attr_le(ni, attr, mi, le); } else if (attr->non_res) { + err = attr_load_runs_range(ni, ATTR_EA, NULL, 0, &ea_run, 0, + size); + if (err) + goto out; + err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size, 0); if (err) goto out;
From: Biju Das biju.das.jz@bp.renesas.com
[ Upstream commit c75ed9f54ce8d349fee557f2b471a4d637ed2a6b ]
We usually do cleanup in reverse order of init. Currently in case of error rz_ssi_release_dma_channels() done in the reverse order. This patch improves error handling in rz_ssi_probe() error path.
While at it, use "goto cleanup" style to reduce code duplication.
Reported-by: Pavel Machek pavel@denx.de Signed-off-by: Biju Das biju.das.jz@bp.renesas.com Link: https://lore.kernel.org/r/20220728092612.38858-1-biju.das.jz@bp.renesas.com Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- sound/soc/sh/rz-ssi.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/sound/soc/sh/rz-ssi.c b/sound/soc/sh/rz-ssi.c index 6d794eaaf4c3..2e33a1fa0a6f 100644 --- a/sound/soc/sh/rz-ssi.c +++ b/sound/soc/sh/rz-ssi.c @@ -1022,32 +1022,36 @@ static int rz_ssi_probe(struct platform_device *pdev)
ssi->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); if (IS_ERR(ssi->rstc)) { - rz_ssi_release_dma_channels(ssi); - return PTR_ERR(ssi->rstc); + ret = PTR_ERR(ssi->rstc); + goto err_reset; }
reset_control_deassert(ssi->rstc); pm_runtime_enable(&pdev->dev); ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { - rz_ssi_release_dma_channels(ssi); - pm_runtime_disable(ssi->dev); - reset_control_assert(ssi->rstc); - return dev_err_probe(ssi->dev, ret, "pm_runtime_resume_and_get failed\n"); + dev_err(&pdev->dev, "pm_runtime_resume_and_get failed\n"); + goto err_pm; }
ret = devm_snd_soc_register_component(&pdev->dev, &rz_ssi_soc_component, rz_ssi_soc_dai, ARRAY_SIZE(rz_ssi_soc_dai)); if (ret < 0) { - rz_ssi_release_dma_channels(ssi); - - pm_runtime_put(ssi->dev); - pm_runtime_disable(ssi->dev); - reset_control_assert(ssi->rstc); dev_err(&pdev->dev, "failed to register snd component\n"); + goto err_snd_soc; }
+ return 0; + +err_snd_soc: + pm_runtime_put(ssi->dev); +err_pm: + pm_runtime_disable(ssi->dev); + reset_control_assert(ssi->rstc); +err_reset: + rz_ssi_release_dma_channels(ssi); + return ret; }
From: Josip Pavic Josip.Pavic@amd.com
[ Upstream commit 8de297dc046c180651c0500f8611663ae1c3828a ]
[why] In some cases MPC tree bottom pipe ends up point to itself. This causes iterating from top to bottom to hang the system in an infinite loop.
[how] When looping to next MPC bottom pipe, check that the pointer is not same as current to avoid infinite loop.
Reviewed-by: Josip Pavic Josip.Pavic@amd.com Reviewed-by: Jun Lei Jun.Lei@amd.com Acked-by: Alex Hung alex.hung@amd.com Signed-off-by: Aric Cyr aric.cyr@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 6 ++++++ 2 files changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 11019c2c62cc..8192f1967e92 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -126,6 +126,12 @@ struct mpcc *mpc1_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) while (tmp_mpcc != NULL) { if (tmp_mpcc->dpp_id == dpp_id) return tmp_mpcc; + + /* avoid circular linked list */ + ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); + if (tmp_mpcc == tmp_mpcc->mpcc_bot) + break; + tmp_mpcc = tmp_mpcc->mpcc_bot; } return NULL; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index 947eb0df3f12..142fc0a3a536 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -532,6 +532,12 @@ struct mpcc *mpc2_get_mpcc_for_dpp(struct mpc_tree *tree, int dpp_id) while (tmp_mpcc != NULL) { if (tmp_mpcc->dpp_id == 0xf || tmp_mpcc->dpp_id == dpp_id) return tmp_mpcc; + + /* avoid circular linked list */ + ASSERT(tmp_mpcc != tmp_mpcc->mpcc_bot); + if (tmp_mpcc == tmp_mpcc->mpcc_bot) + break; + tmp_mpcc = tmp_mpcc->mpcc_bot; } return NULL;
From: Leo Ma hanghong.ma@amd.com
[ Upstream commit 0591183699fceeafb4c4141072d47775de83ecfb ]
[Why] Reported from customer the checksum in AMD VSIF V3 is incorrect and causing blank screen issue.
[How] Fix the packet length issue on AMD HDMI VSIF V3.
Reviewed-by: Anthony Koo Anthony.Koo@amd.com Acked-by: Tom Chung chiahsuan.chung@amd.com Signed-off-by: Leo Ma hanghong.ma@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- .../drm/amd/display/modules/freesync/freesync.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index b99aa232bd8b..4bee6d018bfa 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -567,10 +567,6 @@ static void build_vrr_infopacket_data_v1(const struct mod_vrr_params *vrr, * Note: We should never go above the field rate of the mode timing set. */ infopacket->sb[8] = (unsigned char)((vrr->max_refresh_in_uhz + 500000) / 1000000); - - /* FreeSync HDR */ - infopacket->sb[9] = 0; - infopacket->sb[10] = 0; }
static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr, @@ -638,10 +634,6 @@ static void build_vrr_infopacket_data_v3(const struct mod_vrr_params *vrr,
/* PB16 : Reserved bits 7:1, FixedRate bit 0 */ infopacket->sb[16] = (vrr->state == VRR_STATE_ACTIVE_FIXED) ? 1 : 0; - - //FreeSync HDR - infopacket->sb[9] = 0; - infopacket->sb[10] = 0; }
static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, @@ -726,8 +718,7 @@ static void build_vrr_infopacket_header_v2(enum signal_type signal, /* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length = 0x09] */ infopacket->hb2 = 0x09;
- *payload_size = 0x0A; - + *payload_size = 0x09; } else if (dc_is_dp_signal(signal)) {
/* HEADER */ @@ -776,9 +767,9 @@ static void build_vrr_infopacket_header_v3(enum signal_type signal, infopacket->hb1 = version;
/* HB2 = [Bits 7:5 = 0] [Bits 4:0 = Length] */ - *payload_size = 0x10; - infopacket->hb2 = *payload_size - 1; //-1 for checksum + infopacket->hb2 = 0x10;
+ *payload_size = 0x10; } else if (dc_is_dp_signal(signal)) {
/* HEADER */
From: Alvin Lee alvin.lee2@amd.com
[ Upstream commit 84ef99c728079dfd21d6bc70b4c3e4af20602b3c ]
[Description] Observed in stereomode that programming FLIP_LEFT_EYE can cause hangs. Keep FLIP_ANY_FRAME in stereo mode so the surface flip can take place before left or right eye
Reviewed-by: Martin Leung Martin.Leung@amd.com Acked-by: Tom Chung chiahsuan.chung@amd.com Signed-off-by: Alvin Lee alvin.lee2@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index f24612523248..33c2337c4edf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -86,7 +86,7 @@ bool hubp3_program_surface_flip_and_addr( VMID, address->vmid);
if (address->type == PLN_ADDR_TYPE_GRPH_STEREO) { - REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0x1); + REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_MODE_FOR_STEREOSYNC, 0); REG_UPDATE(DCSURF_FLIP_CONTROL, SURFACE_FLIP_IN_STEREOSYNC, 0x1);
} else {
From: Fudong Wang Fudong.Wang@amd.com
[ Upstream commit b2a93490201300a749ad261b5c5d05cb50179c44 ]
[Why] After ODM clock off, optc underflow bit will be kept there always and clear not work. We need to clear that before clock off.
[How] Clear that if have when clock off.
Reviewed-by: Alvin Lee alvin.lee2@amd.com Acked-by: Tom Chung chiahsuan.chung@amd.com Signed-off-by: Fudong Wang Fudong.Wang@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 37848f4577b1..92fee47278e5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -480,6 +480,11 @@ void optc1_enable_optc_clock(struct timing_generator *optc, bool enable) OTG_CLOCK_ON, 1, 1, 1000); } else { + + //last chance to clear underflow, otherwise, it will always there due to clock is off. + if (optc->funcs->is_optc_underflow_occurred(optc) == true) + optc->funcs->clear_optc_underflow(optc); + REG_UPDATE_2(OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, 0, OTG_CLOCK_EN, 0);
From: Namjae Jeon linkinjeon@kernel.org
[ Upstream commit fe54833dc8d97ef387e86f7c80537d51c503ca75 ]
If share is not configured in smb.conf, smb2 tree connect should return STATUS_BAD_NETWORK_NAME instead of STATUS_BAD_NETWORK_PATH.
Signed-off-by: Namjae Jeon linkinjeon@kernel.org Reviewed-by: Hyunchul Lee hyc.lee@gmail.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ksmbd/mgmt/tree_connect.c | 2 +- fs/ksmbd/smb2pdu.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/fs/ksmbd/mgmt/tree_connect.c b/fs/ksmbd/mgmt/tree_connect.c index 0d28e723a28c..940385c6a913 100644 --- a/fs/ksmbd/mgmt/tree_connect.c +++ b/fs/ksmbd/mgmt/tree_connect.c @@ -18,7 +18,7 @@ struct ksmbd_tree_conn_status ksmbd_tree_conn_connect(struct ksmbd_session *sess, char *share_name) { - struct ksmbd_tree_conn_status status = {-EINVAL, NULL}; + struct ksmbd_tree_conn_status status = {-ENOENT, NULL}; struct ksmbd_tree_connect_response *resp = NULL; struct ksmbd_share_config *sc; struct ksmbd_tree_connect *tree_conn = NULL; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 28b5d20c8766..824f17a101a9 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1932,8 +1932,9 @@ int smb2_tree_connect(struct ksmbd_work *work) rsp->hdr.Status = STATUS_SUCCESS; rc = 0; break; + case -ENOENT: case KSMBD_TREE_CONN_STATUS_NO_SHARE: - rsp->hdr.Status = STATUS_BAD_NETWORK_PATH; + rsp->hdr.Status = STATUS_BAD_NETWORK_NAME; break; case -ENOMEM: case KSMBD_TREE_CONN_STATUS_NOMEM:
From: "Denis V. Lunev" den@openvz.org
[ Upstream commit 66ba215cb51323e4e55e38fd5f250e0fae0cbc94 ]
Normal processing of ARP request (usually this is Ethernet broadcast packet) coming to the host is looking like the following: * the packet comes to arp_process() call and is passed through routing procedure * the request is put into the queue using pneigh_enqueue() if corresponding ARP record is not local (common case for container records on the host) * the request is processed by timer (within 80 jiffies by default) and ARP reply is sent from the same arp_process() using NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED condition (flag is set inside pneigh_enqueue())
And here the problem comes. Linux kernel calls pneigh_queue_purge() which destroys the whole queue of ARP requests on ANY network interface start/stop event through __neigh_ifdown().
This is actually not a problem within the original world as network interface start/stop was accessible to the host 'root' only, which could do more destructive things. But the world is changed and there are Linux containers available. Here container 'root' has an access to this API and could be considered as untrusted user in the hosting (container's) world.
Thus there is an attack vector to other containers on node when container's root will endlessly start/stop interfaces. We have observed similar situation on a real production node when docker container was doing such activity and thus other containers on the node become not accessible.
The patch proposed doing very simple thing. It drops only packets from the same namespace in the pneigh_queue_purge() where network interface state change is detected. This is enough to prevent the problem for the whole node preserving original semantics of the code.
v2: - do del_timer_sync() if queue is empty after pneigh_queue_purge() v3: - rebase to net tree
Cc: "David S. Miller" davem@davemloft.net Cc: Eric Dumazet edumazet@google.com Cc: Jakub Kicinski kuba@kernel.org Cc: Paolo Abeni pabeni@redhat.com Cc: Daniel Borkmann daniel@iogearbox.net Cc: David Ahern dsahern@kernel.org Cc: Yajun Deng yajun.deng@linux.dev Cc: Roopa Prabhu roopa@nvidia.com Cc: Christian Brauner brauner@kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Alexey Kuznetsov kuznet@ms2.inr.ac.ru Cc: Alexander Mikhalitsyn alexander.mikhalitsyn@virtuozzo.com Cc: Konstantin Khorenko khorenko@virtuozzo.com Cc: kernel@openvz.org Cc: devel@openvz.org Investigated-by: Alexander Mikhalitsyn alexander.mikhalitsyn@virtuozzo.com Signed-off-by: Denis V. Lunev den@openvz.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org --- net/core/neighbour.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ff049733ccee..f0be42c140b9 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -279,14 +279,23 @@ static int neigh_del_timer(struct neighbour *n) return 0; }
-static void pneigh_queue_purge(struct sk_buff_head *list) +static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) { + unsigned long flags; struct sk_buff *skb;
- while ((skb = skb_dequeue(list)) != NULL) { - dev_put(skb->dev); - kfree_skb(skb); + spin_lock_irqsave(&list->lock, flags); + skb = skb_peek(list); + while (skb != NULL) { + struct sk_buff *skb_next = skb_peek_next(skb, list); + if (net == NULL || net_eq(dev_net(skb->dev), net)) { + __skb_unlink(skb, list); + dev_put(skb->dev); + kfree_skb(skb); + } + skb = skb_next; } + spin_unlock_irqrestore(&list->lock, flags); }
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, @@ -357,9 +366,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - - del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue); + pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); + if (skb_queue_empty_lockless(&tbl->proxy_queue)) + del_timer_sync(&tbl->proxy_timer); return 0; }
@@ -1735,7 +1744,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) /* It is not clean... Fix it to unload IPv6 module safely */ cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue); + pneigh_queue_purge(&tbl->proxy_queue, NULL); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n");
From: Juergen Gross jgross@suse.com
[ Upstream commit 7b6670b03641ac308aaa6fa2e6f964ac993b5ea3 ]
When booting under KVM the following error messages are issued:
hypfs.7f5705: The hardware system does not support hypfs hypfs.7a79f0: Initialization of hypfs failed with rc=-61
Demote the severity of first message from "error" to "info" and issue the second message only in other error cases.
Signed-off-by: Juergen Gross jgross@suse.com Acked-by: Heiko Carstens hca@linux.ibm.com Acked-by: Christian Borntraeger borntraeger@linux.ibm.com Link: https://lore.kernel.org/r/20220620094534.18967-1-jgross@suse.com [arch/s390/hypfs/hypfs_diag.c changed description] Signed-off-by: Alexander Gordeev agordeev@linux.ibm.com Signed-off-by: Sasha Levin sashal@kernel.org --- arch/s390/hypfs/hypfs_diag.c | 2 +- arch/s390/hypfs/inode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index f0bc4dc3e9bf..6511d15ace45 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -437,7 +437,7 @@ __init int hypfs_diag_init(void) int rc;
if (diag204_probe()) { - pr_err("The hardware system does not support hypfs\n"); + pr_info("The hardware system does not support hypfs\n"); return -ENODATA; }
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 5c97f48cea91..ee919bfc8186 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -496,9 +496,9 @@ static int __init hypfs_init(void) hypfs_vm_exit(); fail_hypfs_diag_exit: hypfs_diag_exit(); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); fail_dbfs_exit: hypfs_dbfs_exit(); - pr_err("Initialization of hypfs failed with rc=%i\n", rc); return rc; } device_initcall(hypfs_init)
From: Namjae Jeon linkinjeon@kernel.org
[ Upstream commit 17661ecf6a64eb11ae7f1108fe88686388b2acd5 ]
When smb client open file in ksmbd share with O_TRUNC, dos attribute xattr is removed as well as data in file. This cause the FSCTL_SET_SPARSE request from the client fails because ksmbd can't update the dos attribute after setting ATTR_SPARSE_FILE. And this patch fix xfstests generic/469 test also.
Signed-off-by: Namjae Jeon linkinjeon@kernel.org Reviewed-by: Hyunchul Lee hyc.lee@gmail.com Signed-off-by: Steve French stfrench@microsoft.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ksmbd/smb2pdu.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 824f17a101a9..55ee639703ff 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2319,15 +2319,15 @@ static int smb2_remove_smb_xattrs(struct path *path) name += strlen(name) + 1) { ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
- if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && - strncmp(&name[XATTR_USER_PREFIX_LEN], DOS_ATTRIBUTE_PREFIX, - DOS_ATTRIBUTE_PREFIX_LEN) && - strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) - continue; - - err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, name); - if (err) - ksmbd_debug(SMB, "remove xattr failed : %s\n", name); + if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && + !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, + STREAM_PREFIX_LEN)) { + err = ksmbd_vfs_remove_xattr(user_ns, path->dentry, + name); + if (err) + ksmbd_debug(SMB, "remove xattr failed : %s\n", + name); + } } out: kvfree(xattr_list);
From: Evan Quan evan.quan@amd.com
[ Upstream commit 0a2d922a5618377cdf8fa476351362733ef55342 ]
To avoid any potential memory leak.
Signed-off-by: Evan Quan evan.quan@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 918d5c7c2328..79976921dc46 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -3915,6 +3915,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .dump_pptable = sienna_cichlid_dump_pptable, .init_microcode = smu_v11_0_init_microcode, .load_microcode = smu_v11_0_load_microcode, + .fini_microcode = smu_v11_0_fini_microcode, .init_smc_tables = sienna_cichlid_init_smc_tables, .fini_smc_tables = smu_v11_0_fini_smc_tables, .init_power = smu_v11_0_init_power,
From: Ilya Bakoulin Ilya.Bakoulin@amd.com
[ Upstream commit 04fb918bf421b299feaee1006e82921d7d381f18 ]
[Why] Some pixel clock values could cause HDMI TMDS SSCPs to be misaligned between different HDMI lanes when using YCbCr420 10-bit pixel format.
BIOS functions for transmitter/encoder control take pixel clock in kHz increments, whereas the function for setting the pixel clock is in 100Hz increments. Setting pixel clock to a value that is not on a kHz boundary will cause the issue.
[How] Round pixel clock down to nearest kHz in 10/12-bpc cases.
Reviewed-by: Aric Cyr Aric.Cyr@amd.com Acked-by: Brian Chang Brian.Chang@amd.com Signed-off-by: Ilya Bakoulin Ilya.Bakoulin@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index 054823d12403..5f1b735da506 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -545,9 +545,11 @@ static void dce112_get_pix_clk_dividers_helper ( switch (pix_clk_params->color_depth) { case COLOR_DEPTH_101010: actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 5) >> 2; + actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; break; case COLOR_DEPTH_121212: actual_pixel_clock_100hz = (actual_pixel_clock_100hz * 6) >> 2; + actual_pixel_clock_100hz -= actual_pixel_clock_100hz % 10; break; case COLOR_DEPTH_161616: actual_pixel_clock_100hz = actual_pixel_clock_100hz * 2;
From: Dusica Milinkovic Dusica.Milinkovic@amd.com
[ Upstream commit 373008bfc9cdb0f050258947fa5a095f0657e1bc ]
[Why] During multi-vf executing benchmark (Luxmark) observed kiq error timeout. It happenes because all of VFs do the tlb invalidation at the same time. Although each VF has the invalidate register set, from hardware side the invalidate requests are queue to execute.
[How] In case of 12 VF increase timeout on 12*100ms
Signed-off-by: Dusica Milinkovic Dusica.Milinkovic@amd.com Acked-by: Shaoyun Liu shaoyun.liu@amd.com Acked-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5f95d03fd46a..4f62f422bcb7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -312,7 +312,7 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0, AMDGPU_CP_KIQ_IRQ_LAST }; - +#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */ #define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ #define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ #define MAX_KIQ_REG_TRY 1000 diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 93a4da4284ed..9c07ec8b9732 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -414,6 +414,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -432,7 +433,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); return -ETIME; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 0e731016921b..70d24b522df8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -863,6 +863,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint32_t seq; uint16_t queried_pasid; bool ret; + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -902,7 +903,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); up_read(&adev->reset_sem);
From: Charlene Liu Charlene.Liu@amd.com
[ Upstream commit 5544a7b5a07480192eb5fd3536462faed2c21528 ]
[why] this is to ensure that driver will not reprogram hvm_prefetch_req again if it is done.
Reviewed-by: Martin Leung Martin.Leung@amd.com Acked-by: Brian Chang Brian.Chang@amd.com Signed-off-by: Charlene Liu Charlene.Liu@amd.com Tested-by: Daniel Wheeler daniel.wheeler@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c index 36044cb8ec83..1c0f56d8ba8b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c @@ -67,9 +67,15 @@ static uint32_t convert_and_clamp( void dcn21_dchvm_init(struct hubbub *hubbub) { struct dcn20_hubbub *hubbub1 = TO_DCN20_HUBBUB(hubbub); - uint32_t riommu_active; + uint32_t riommu_active, prefetch_done; int i;
+ REG_GET(DCHVM_RIOMMU_STAT0, HOSTVM_PREFETCH_DONE, &prefetch_done); + + if (prefetch_done) { + hubbub->riommu_active = true; + return; + } //Init DCHVM block REG_UPDATE(DCHVM_CTRL0, HOSTVM_INIT_REQ, 1);
From: Geert Uytterhoeven geert@linux-m68k.org
[ Upstream commit aa5762c34213aba7a72dc58e70601370805fa794 ]
NF_CONNTRACK_PROCFS was marked obsolete in commit 54b07dca68557b09 ("netfilter: provide config option to disable ancient procfs parts") in v3.3.
Signed-off-by: Geert Uytterhoeven geert@linux-m68k.org Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/Kconfig | 1 - 1 file changed, 1 deletion(-)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 92a747896f80..4f645d51c257 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -133,7 +133,6 @@ config NF_CONNTRACK_ZONES
config NF_CONNTRACK_PROCFS bool "Supply CT list in procfs (OBSOLETE)" - default y depends on PROC_FS help This option enables for the list of known conntrack entries
From: Florian Westphal fw@strlen.de
[ Upstream commit b71b7bfeac38c7a21c423ddafb29aa6258949df8 ]
"ns1" is a too generic name, use a random suffix to avoid errors when such a netns exists. Also allows to run multiple instances of the script in parallel.
Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Sasha Levin sashal@kernel.org --- .../selftests/netfilter/nft_flowtable.sh | 246 +++++++++--------- 1 file changed, 128 insertions(+), 118 deletions(-)
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index d4ffebb989f8..c336e6c148d1 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -14,6 +14,11 @@ # nft_flowtable.sh -o8000 -l1500 -r2000 #
+sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +nsr1="nsr1-$sfx" +nsr2="nsr2-$sfx"
# Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -36,18 +41,17 @@ checktool (){ checktool "nft --version" "run test without nft tool" checktool "ip -Version" "run test without ip tool" checktool "which nc" "run test without nc (netcat)" -checktool "ip netns add nsr1" "create net namespace" +checktool "ip netns add $nsr1" "create net namespace $nsr1"
-ip netns add ns1 -ip netns add ns2 - -ip netns add nsr2 +ip netns add $ns1 +ip netns add $ns2 +ip netns add $nsr2
cleanup() { - for i in 1 2; do - ip netns del ns$i - ip netns del nsr$i - done + ip netns del $ns1 + ip netns del $ns2 + ip netns del $nsr1 + ip netns del $nsr2
rm -f "$ns1in" "$ns1out" rm -f "$ns2in" "$ns2out" @@ -59,22 +63,21 @@ trap cleanup EXIT
sysctl -q net.netfilter.nf_log_all_netns=1
-ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 -ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 +ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1 +ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
-ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 +ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
for dev in lo veth0 veth1; do - for i in 1 2; do - ip -net nsr$i link set $dev up - done + ip -net $nsr1 link set $dev up + ip -net $nsr2 link set $dev up done
-ip -net nsr1 addr add 10.0.1.1/24 dev veth0 -ip -net nsr1 addr add dead:1::1/64 dev veth0 +ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 +ip -net $nsr1 addr add dead:1::1/64 dev veth0
-ip -net nsr2 addr add 10.0.2.1/24 dev veth1 -ip -net nsr2 addr add dead:2::1/64 dev veth1 +ip -net $nsr2 addr add 10.0.2.1/24 dev veth1 +ip -net $nsr2 addr add dead:2::1/64 dev veth1
# set different MTUs so we need to push packets coming from ns1 (large MTU) # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), @@ -106,49 +109,56 @@ do esac done
-if ! ip -net nsr1 link set veth0 mtu $omtu; then +if ! ip -net $nsr1 link set veth0 mtu $omtu; then exit 1 fi
-ip -net ns1 link set eth0 mtu $omtu +ip -net $ns1 link set eth0 mtu $omtu
-if ! ip -net nsr2 link set veth1 mtu $rmtu; then +if ! ip -net $nsr2 link set veth1 mtu $rmtu; then exit 1 fi
-ip -net ns2 link set eth0 mtu $rmtu +ip -net $ns2 link set eth0 mtu $rmtu
# transfer-net between nsr1 and nsr2. # these addresses are not used for connections. -ip -net nsr1 addr add 192.168.10.1/24 dev veth1 -ip -net nsr1 addr add fee1:2::1/64 dev veth1 - -ip -net nsr2 addr add 192.168.10.2/24 dev veth0 -ip -net nsr2 addr add fee1:2::2/64 dev veth0 - -for i in 1 2; do - ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null - ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null - - ip -net ns$i link set lo up - ip -net ns$i link set eth0 up - ip -net ns$i addr add 10.0.$i.99/24 dev eth0 - ip -net ns$i route add default via 10.0.$i.1 - ip -net ns$i addr add dead:$i::99/64 dev eth0 - ip -net ns$i route add default via dead:$i::1 - if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then +ip -net $nsr1 addr add 192.168.10.1/24 dev veth1 +ip -net $nsr1 addr add fee1:2::1/64 dev veth1 + +ip -net $nsr2 addr add 192.168.10.2/24 dev veth0 +ip -net $nsr2 addr add fee1:2::2/64 dev veth0 + +for i in 0 1; do + ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null + ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null +done + +for ns in $ns1 $ns2;do + ip -net $ns link set lo up + ip -net $ns link set eth0 up + + if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then echo "ERROR: Check Originator/Responder values (problem during address addition)" exit 1 fi - # don't set ip DF bit for first two tests - ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null + ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null done
-ip -net nsr1 route add default via 192.168.10.2 -ip -net nsr2 route add default via 192.168.10.1 +ip -net $ns1 addr add 10.0.1.99/24 dev eth0 +ip -net $ns2 addr add 10.0.2.99/24 dev eth0 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns2 route add default via 10.0.2.1 +ip -net $ns1 addr add dead:1::99/64 dev eth0 +ip -net $ns2 addr add dead:2::99/64 dev eth0 +ip -net $ns1 route add default via dead:1::1 +ip -net $ns2 route add default via dead:2::1 + +ip -net $nsr1 route add default via 192.168.10.2 +ip -net $nsr2 route add default via 192.168.10.1
-ip netns exec nsr1 nft -f - <<EOF +ip netns exec $nsr1 nft -f - <<EOF table inet filter { flowtable f1 { hook ingress priority 0 @@ -197,18 +207,18 @@ if [ $? -ne 0 ]; then fi
# test basic connectivity -if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then - echo "ERROR: ns1 cannot reach ns2" 1>&2 +if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach ns2" 1>&2 exit 1 fi
-if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then - echo "ERROR: ns2 cannot reach ns1" 1>&2 +if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 exit 1 fi
if [ $ret -eq 0 ];then - echo "PASS: netns routing/connectivity: ns1 can reach ns2" + echo "PASS: netns routing/connectivity: $ns1 can reach $ns2" fi
ns1in=$(mktemp) @@ -312,24 +322,24 @@ make_file "$ns2in"
# First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. -if test_tcp_forwarding ns1 ns2; then +if test_tcp_forwarding $ns1 $ns2; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi
# delete default route, i.e. ns2 won't be able to reach ns1 and # will depend on ns1 being masqueraded in nsr1. # expect ns1 has nsr1 address. -ip -net ns2 route del default via 10.0.2.1 -ip -net ns2 route del default via dead:2::1 -ip -net ns2 route add 192.168.10.1 via 10.0.2.1 +ip -net $ns2 route del default via 10.0.2.1 +ip -net $ns2 route del default via dead:2::1 +ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
# Second test: # Same, but with NAT enabled. -ip netns exec nsr1 nft -f - <<EOF +ip netns exec $nsr1 nft -f - <<EOF table ip nat { chain prerouting { type nat hook prerouting priority 0; policy accept; @@ -343,47 +353,47 @@ table ip nat { } EOF
-if test_tcp_forwarding_nat ns1 ns2; then +if test_tcp_forwarding_nat $ns1 $ns2; then echo "PASS: flow offloaded for ns1/ns2 with NAT" else echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi
# Third test: # Same as second test, but with PMTU discovery enabled. -handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d # -f 2) +handle=$(ip netns exec $nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d # -f 2)
-if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then +if ! ip netns exec $nsr1 nft delete rule inet filter forward $handle; then echo "FAIL: Could not delete large-packet accept rule" exit 1 fi
-ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null -ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-if test_tcp_forwarding_nat ns1 ns2; then +if test_tcp_forwarding_nat $ns1 $ns2; then echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" else echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset fi
# Another test: # Add bridge interface br0 to Router1, with NAT enabled. -ip -net nsr1 link add name br0 type bridge -ip -net nsr1 addr flush dev veth0 -ip -net nsr1 link set up dev veth0 -ip -net nsr1 link set veth0 master br0 -ip -net nsr1 addr add 10.0.1.1/24 dev br0 -ip -net nsr1 addr add dead:1::1/64 dev br0 -ip -net nsr1 link set up dev br0 +ip -net $nsr1 link add name br0 type bridge +ip -net $nsr1 addr flush dev veth0 +ip -net $nsr1 link set up dev veth0 +ip -net $nsr1 link set veth0 master br0 +ip -net $nsr1 addr add 10.0.1.1/24 dev br0 +ip -net $nsr1 addr add dead:1::1/64 dev br0 +ip -net $nsr1 link set up dev br0
-ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null +ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
# br0 with NAT enabled. -ip netns exec nsr1 nft -f - <<EOF +ip netns exec $nsr1 nft -f - <<EOF flush table ip nat table ip nat { chain prerouting { @@ -398,59 +408,59 @@ table ip nat { } EOF
-if test_tcp_forwarding_nat ns1 ns2; then +if test_tcp_forwarding_nat $ns1 $ns2; then echo "PASS: flow offloaded for ns1/ns2 with bridge NAT" else echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi
# Another test: # Add bridge interface br0 to Router1, with NAT and VLAN. -ip -net nsr1 link set veth0 nomaster -ip -net nsr1 link set down dev veth0 -ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10 -ip -net nsr1 link set up dev veth0 -ip -net nsr1 link set up dev veth0.10 -ip -net nsr1 link set veth0.10 master br0 - -ip -net ns1 addr flush dev eth0 -ip -net ns1 link add link eth0 name eth0.10 type vlan id 10 -ip -net ns1 link set eth0 up -ip -net ns1 link set eth0.10 up -ip -net ns1 addr add 10.0.1.99/24 dev eth0.10 -ip -net ns1 route add default via 10.0.1.1 -ip -net ns1 addr add dead:1::99/64 dev eth0.10 - -if test_tcp_forwarding_nat ns1 ns2; then +ip -net $nsr1 link set veth0 nomaster +ip -net $nsr1 link set down dev veth0 +ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10 +ip -net $nsr1 link set up dev veth0 +ip -net $nsr1 link set up dev veth0.10 +ip -net $nsr1 link set veth0.10 master br0 + +ip -net $ns1 addr flush dev eth0 +ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10 +ip -net $ns1 link set eth0 up +ip -net $ns1 link set eth0.10 up +ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns1 addr add dead:1::99/64 dev eth0.10 + +if test_tcp_forwarding_nat $ns1 $ns2; then echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN" else echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 - ip netns exec nsr1 nft list ruleset + ip netns exec $nsr1 nft list ruleset ret=1 fi
# restore test topology (remove bridge and VLAN) -ip -net nsr1 link set veth0 nomaster -ip -net nsr1 link set veth0 down -ip -net nsr1 link set veth0.10 down -ip -net nsr1 link delete veth0.10 type vlan -ip -net nsr1 link delete br0 type bridge -ip -net ns1 addr flush dev eth0.10 -ip -net ns1 link set eth0.10 down -ip -net ns1 link set eth0 down -ip -net ns1 link delete eth0.10 type vlan +ip -net $nsr1 link set veth0 nomaster +ip -net $nsr1 link set veth0 down +ip -net $nsr1 link set veth0.10 down +ip -net $nsr1 link delete veth0.10 type vlan +ip -net $nsr1 link delete br0 type bridge +ip -net $ns1 addr flush dev eth0.10 +ip -net $ns1 link set eth0.10 down +ip -net $ns1 link set eth0 down +ip -net $ns1 link delete eth0.10 type vlan
# restore address in ns1 and nsr1 -ip -net ns1 link set eth0 up -ip -net ns1 addr add 10.0.1.99/24 dev eth0 -ip -net ns1 route add default via 10.0.1.1 -ip -net ns1 addr add dead:1::99/64 dev eth0 -ip -net ns1 route add default via dead:1::1 -ip -net nsr1 addr add 10.0.1.1/24 dev veth0 -ip -net nsr1 addr add dead:1::1/64 dev veth0 -ip -net nsr1 link set up dev veth0 +ip -net $ns1 link set eth0 up +ip -net $ns1 addr add 10.0.1.99/24 dev eth0 +ip -net $ns1 route add default via 10.0.1.1 +ip -net $ns1 addr add dead:1::99/64 dev eth0 +ip -net $ns1 route add default via dead:1::1 +ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 +ip -net $nsr1 addr add dead:1::1/64 dev veth0 +ip -net $nsr1 link set up dev veth0
KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) @@ -480,23 +490,23 @@ do_esp() {
}
-do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
-do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
-ip netns exec nsr1 nft delete table ip nat +ip netns exec $nsr1 nft delete table ip nat
# restore default routes -ip -net ns2 route del 192.168.10.1 via 10.0.2.1 -ip -net ns2 route add default via 10.0.2.1 -ip -net ns2 route add default via dead:2::1 +ip -net $ns2 route del 192.168.10.1 via 10.0.2.1 +ip -net $ns2 route add default via 10.0.2.1 +ip -net $ns2 route add default via dead:2::1
-if test_tcp_forwarding ns1 ns2; then +if test_tcp_forwarding $ns1 $ns2; then echo "PASS: ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" - ip netns exec nsr1 nft list ruleset 1>&2 - ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2 + ip netns exec $nsr1 nft list ruleset 1>&2 + ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2 fi
exit $ret
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 0a27a0474d146eb79e09ec88bf0d4229f4cfc1b8 ]
These definitions exist in disk-io.c, which is not related to the locking. Move this over to locking.h/c where it makes more sense.
Reviewed-by: Johannes Thumshirn johannes.thumshirn@wdc.com Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/disk-io.c | 82 ---------------------------------------------- fs/btrfs/disk-io.h | 10 ------ fs/btrfs/locking.c | 80 ++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/locking.h | 9 +++++ 4 files changed, 89 insertions(+), 92 deletions(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e65c3039caf1..c9edc668b3d9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -121,88 +121,6 @@ struct async_submit_bio { blk_status_t status; };
-/* - * Lockdep class keys for extent_buffer->lock's in this root. For a given - * eb, the lockdep key is determined by the btrfs_root it belongs to and - * the level the eb occupies in the tree. - * - * Different roots are used for different purposes and may nest inside each - * other and they require separate keysets. As lockdep keys should be - * static, assign keysets according to the purpose of the root as indicated - * by btrfs_root->root_key.objectid. This ensures that all special purpose - * roots have separate keysets. - * - * Lock-nesting across peer nodes is always done with the immediate parent - * node locked thus preventing deadlock. As lockdep doesn't know this, use - * subclass to avoid triggering lockdep warning in such cases. - * - * The key is set by the readpage_end_io_hook after the buffer has passed - * csum validation but before the pages are unlocked. It is also set by - * btrfs_init_new_buffer on freshly allocated blocks. - * - * We also add a check to make sure the highest level of the tree is the - * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code - * needs update as well. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# if BTRFS_MAX_LEVEL != 8 -# error -# endif - -#define DEFINE_LEVEL(stem, level) \ - .names[level] = "btrfs-" stem "-0" #level, - -#define DEFINE_NAME(stem) \ - DEFINE_LEVEL(stem, 0) \ - DEFINE_LEVEL(stem, 1) \ - DEFINE_LEVEL(stem, 2) \ - DEFINE_LEVEL(stem, 3) \ - DEFINE_LEVEL(stem, 4) \ - DEFINE_LEVEL(stem, 5) \ - DEFINE_LEVEL(stem, 6) \ - DEFINE_LEVEL(stem, 7) - -static struct btrfs_lockdep_keyset { - u64 id; /* root objectid */ - /* Longest entry: btrfs-free-space-00 */ - char names[BTRFS_MAX_LEVEL][20]; - struct lock_class_key keys[BTRFS_MAX_LEVEL]; -} btrfs_lockdep_keysets[] = { - { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") }, - { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") }, - { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") }, - { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") }, - { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") }, - { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") }, - { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") }, - { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") }, - { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") }, - { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") }, - { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") }, - { .id = 0, DEFINE_NAME("tree") }, -}; - -#undef DEFINE_LEVEL -#undef DEFINE_NAME - -void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, - int level) -{ - struct btrfs_lockdep_keyset *ks; - - BUG_ON(level >= ARRAY_SIZE(ks->keys)); - - /* find the matching keyset, id 0 is the default entry */ - for (ks = btrfs_lockdep_keysets; ks->id; ks++) - if (ks->id == objectid) - break; - - lockdep_set_class_and_name(&eb->lock, - &ks->keys[level], ks->names[level]); -} - -#endif - /* * Compute the csum of a btree block and store the result to provided buffer. */ diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 0e7e9526b6a8..1b8fd3deafc9 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -140,14 +140,4 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root); int __init btrfs_end_io_wq_init(void); void __cold btrfs_end_io_wq_exit(void);
-#ifdef CONFIG_DEBUG_LOCK_ALLOC -void btrfs_set_buffer_lockdep_class(u64 objectid, - struct extent_buffer *eb, int level); -#else -static inline void btrfs_set_buffer_lockdep_class(u64 objectid, - struct extent_buffer *eb, int level) -{ -} -#endif - #endif diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 33461b4f9c8b..5747c63929df 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -13,6 +13,86 @@ #include "extent_io.h" #include "locking.h"
+/* + * Lockdep class keys for extent_buffer->lock's in this root. For a given + * eb, the lockdep key is determined by the btrfs_root it belongs to and + * the level the eb occupies in the tree. + * + * Different roots are used for different purposes and may nest inside each + * other and they require separate keysets. As lockdep keys should be + * static, assign keysets according to the purpose of the root as indicated + * by btrfs_root->root_key.objectid. This ensures that all special purpose + * roots have separate keysets. + * + * Lock-nesting across peer nodes is always done with the immediate parent + * node locked thus preventing deadlock. As lockdep doesn't know this, use + * subclass to avoid triggering lockdep warning in such cases. + * + * The key is set by the readpage_end_io_hook after the buffer has passed + * csum validation but before the pages are unlocked. It is also set by + * btrfs_init_new_buffer on freshly allocated blocks. + * + * We also add a check to make sure the highest level of the tree is the + * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code + * needs update as well. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if BTRFS_MAX_LEVEL != 8 +#error +#endif + +#define DEFINE_LEVEL(stem, level) \ + .names[level] = "btrfs-" stem "-0" #level, + +#define DEFINE_NAME(stem) \ + DEFINE_LEVEL(stem, 0) \ + DEFINE_LEVEL(stem, 1) \ + DEFINE_LEVEL(stem, 2) \ + DEFINE_LEVEL(stem, 3) \ + DEFINE_LEVEL(stem, 4) \ + DEFINE_LEVEL(stem, 5) \ + DEFINE_LEVEL(stem, 6) \ + DEFINE_LEVEL(stem, 7) + +static struct btrfs_lockdep_keyset { + u64 id; /* root objectid */ + /* Longest entry: btrfs-free-space-00 */ + char names[BTRFS_MAX_LEVEL][20]; + struct lock_class_key keys[BTRFS_MAX_LEVEL]; +} btrfs_lockdep_keysets[] = { + { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") }, + { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") }, + { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") }, + { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") }, + { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") }, + { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") }, + { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") }, + { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") }, + { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") }, + { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") }, + { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") }, + { .id = 0, DEFINE_NAME("tree") }, +}; + +#undef DEFINE_LEVEL +#undef DEFINE_NAME + +void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level) +{ + struct btrfs_lockdep_keyset *ks; + + BUG_ON(level >= ARRAY_SIZE(ks->keys)); + + /* Find the matching keyset, id 0 is the default entry */ + for (ks = btrfs_lockdep_keysets; ks->id; ks++) + if (ks->id == objectid) + break; + + lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]); +} + +#endif + /* * Extent buffer locking * ===================== diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index a2e1f1f5c6e3..97370ec0cd29 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -130,4 +130,13 @@ void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock); void btrfs_drew_read_lock(struct btrfs_drew_lock *lock); void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC +void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level); +#else +static inline void btrfs_set_buffer_lockdep_class(u64 objectid, + struct extent_buffer *eb, int level) +{ +} +#endif + #endif
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit b40130b23ca4a08c5785d5a3559805916bddba3c ]
We have been hitting the following lockdep splat with btrfs/187 recently
WARNING: possible circular locking dependency detected 5.19.0-rc8+ #775 Not tainted ------------------------------------------------------ btrfs/752500 is trying to acquire lock: ffff97e1875a97b8 (btrfs-treloc-02#2){+.+.}-{3:3}, at: __btrfs_tree_lock+0x24/0x110
but task is already holding lock: ffff97e1875a9278 (btrfs-tree-01/1){+.+.}-{3:3}, at: __btrfs_tree_lock+0x24/0x110
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (btrfs-tree-01/1){+.+.}-{3:3}: down_write_nested+0x41/0x80 __btrfs_tree_lock+0x24/0x110 btrfs_init_new_buffer+0x7d/0x2c0 btrfs_alloc_tree_block+0x120/0x3b0 __btrfs_cow_block+0x136/0x600 btrfs_cow_block+0x10b/0x230 btrfs_search_slot+0x53b/0xb70 btrfs_lookup_inode+0x2a/0xa0 __btrfs_update_delayed_inode+0x5f/0x280 btrfs_async_run_delayed_root+0x24c/0x290 btrfs_work_helper+0xf2/0x3e0 process_one_work+0x271/0x590 worker_thread+0x52/0x3b0 kthread+0xf0/0x120 ret_from_fork+0x1f/0x30
-> #1 (btrfs-tree-01){++++}-{3:3}: down_write_nested+0x41/0x80 __btrfs_tree_lock+0x24/0x110 btrfs_search_slot+0x3c3/0xb70 do_relocation+0x10c/0x6b0 relocate_tree_blocks+0x317/0x6d0 relocate_block_group+0x1f1/0x560 btrfs_relocate_block_group+0x23e/0x400 btrfs_relocate_chunk+0x4c/0x140 btrfs_balance+0x755/0xe40 btrfs_ioctl+0x1ea2/0x2c90 __x64_sys_ioctl+0x88/0xc0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd
-> #0 (btrfs-treloc-02#2){+.+.}-{3:3}: __lock_acquire+0x1122/0x1e10 lock_acquire+0xc2/0x2d0 down_write_nested+0x41/0x80 __btrfs_tree_lock+0x24/0x110 btrfs_lock_root_node+0x31/0x50 btrfs_search_slot+0x1cb/0xb70 replace_path+0x541/0x9f0 merge_reloc_root+0x1d6/0x610 merge_reloc_roots+0xe2/0x260 relocate_block_group+0x2c8/0x560 btrfs_relocate_block_group+0x23e/0x400 btrfs_relocate_chunk+0x4c/0x140 btrfs_balance+0x755/0xe40 btrfs_ioctl+0x1ea2/0x2c90 __x64_sys_ioctl+0x88/0xc0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd
other info that might help us debug this:
Chain exists of: btrfs-treloc-02#2 --> btrfs-tree-01 --> btrfs-tree-01/1
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(btrfs-tree-01/1); lock(btrfs-tree-01); lock(btrfs-tree-01/1); lock(btrfs-treloc-02#2);
*** DEADLOCK ***
7 locks held by btrfs/752500: #0: ffff97e292fdf460 (sb_writers#12){.+.+}-{0:0}, at: btrfs_ioctl+0x208/0x2c90 #1: ffff97e284c02050 (&fs_info->reclaim_bgs_lock){+.+.}-{3:3}, at: btrfs_balance+0x55f/0xe40 #2: ffff97e284c00878 (&fs_info->cleaner_mutex){+.+.}-{3:3}, at: btrfs_relocate_block_group+0x236/0x400 #3: ffff97e292fdf650 (sb_internal#2){.+.+}-{0:0}, at: merge_reloc_root+0xef/0x610 #4: ffff97e284c02378 (btrfs_trans_num_writers){++++}-{0:0}, at: join_transaction+0x1a8/0x5a0 #5: ffff97e284c023a0 (btrfs_trans_num_extwriters){++++}-{0:0}, at: join_transaction+0x1a8/0x5a0 #6: ffff97e1875a9278 (btrfs-tree-01/1){+.+.}-{3:3}, at: __btrfs_tree_lock+0x24/0x110
stack backtrace: CPU: 1 PID: 752500 Comm: btrfs Not tainted 5.19.0-rc8+ #775 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace:
dump_stack_lvl+0x56/0x73 check_noncircular+0xd6/0x100 ? lock_is_held_type+0xe2/0x140 __lock_acquire+0x1122/0x1e10 lock_acquire+0xc2/0x2d0 ? __btrfs_tree_lock+0x24/0x110 down_write_nested+0x41/0x80 ? __btrfs_tree_lock+0x24/0x110 __btrfs_tree_lock+0x24/0x110 btrfs_lock_root_node+0x31/0x50 btrfs_search_slot+0x1cb/0xb70 ? lock_release+0x137/0x2d0 ? _raw_spin_unlock+0x29/0x50 ? release_extent_buffer+0x128/0x180 replace_path+0x541/0x9f0 merge_reloc_root+0x1d6/0x610 merge_reloc_roots+0xe2/0x260 relocate_block_group+0x2c8/0x560 btrfs_relocate_block_group+0x23e/0x400 btrfs_relocate_chunk+0x4c/0x140 btrfs_balance+0x755/0xe40 btrfs_ioctl+0x1ea2/0x2c90 ? lock_is_held_type+0xe2/0x140 ? lock_is_held_type+0xe2/0x140 ? __x64_sys_ioctl+0x88/0xc0 __x64_sys_ioctl+0x88/0xc0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd
This isn't necessarily new, it's just tricky to hit in practice. There are two competing things going on here. With relocation we create a snapshot of every fs tree with a reloc tree. Any extent buffers that get initialized here are initialized with the reloc root lockdep key. However since it is a snapshot, any blocks that are currently in cache that originally belonged to the fs tree will have the normal tree lockdep key set. This creates the lock dependency of
reloc tree -> normal tree
for the extent buffer locking during the first phase of the relocation as we walk down the reloc root to relocate blocks.
However this is problematic because the final phase of the relocation is merging the reloc root into the original fs root. This involves searching down to any keys that exist in the original fs root and then swapping the relocated block and the original fs root block. We have to search down to the fs root first, and then go search the reloc root for the block we need to replace. This creates the dependency of
normal tree -> reloc tree
which is why lockdep complains.
Additionally even if we were to fix this particular mismatch with a different nesting for the merge case, we're still slotting in a block that has a owner of the reloc root objectid into a normal tree, so that block will have its lockdep key set to the tree reloc root, and create a lockdep splat later on when we wander into that block from the fs root.
Unfortunately the only solution here is to make sure we do not set the lockdep key to the reloc tree lockdep key normally, and then reset any blocks we wander into from the reloc root when we're doing the merged.
This solves the problem of having mixed tree reloc keys intermixed with normal tree keys, and then allows us to make sure in the merge case we maintain the lock order of
normal tree -> reloc tree
We handle this by setting a bit on the reloc root when we do the search for the block we want to relocate, and any block we search into or COW at that point gets set to the reloc tree key. This works correctly because we only ever COW down to the parent node, so we aren't resetting the key for the block we're linking into the fs root.
With this patch we no longer have the lockdep splat in btrfs/187.
Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/ctree.c | 3 +++ fs/btrfs/ctree.h | 2 ++ fs/btrfs/extent-tree.c | 18 +++++++++++++++++- fs/btrfs/extent_io.c | 11 ++++++++++- fs/btrfs/locking.c | 11 +++++++++++ fs/btrfs/locking.h | 5 +++++ fs/btrfs/relocation.c | 2 ++ 7 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 341ce90d24b1..fb7e331b6975 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1938,6 +1938,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (!p->skip_locking) { level = btrfs_header_level(b); + + btrfs_maybe_reset_lockdep_class(root, b); + if (level <= write_lock_level) { btrfs_tree_lock(b); p->locks[level] = BTRFS_WRITE_LOCK; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d1838de0b39c..ba402f60c87f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1105,6 +1105,8 @@ enum { BTRFS_ROOT_QGROUP_FLUSHING, /* This root has a drop operation that was started previously. */ BTRFS_ROOT_UNFINISHED_DROP, + /* This reloc root needs to have its buffers lockdep class reset. */ + BTRFS_ROOT_RESET_LOCKDEP_CLASS, };
static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 248ea15c9734..c71f6480d4d4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4781,6 +4781,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *buf; + u64 lockdep_owner = owner;
buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level); if (IS_ERR(buf)) @@ -4799,12 +4800,27 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, return ERR_PTR(-EUCLEAN); }
+ /* + * The reloc trees are just snapshots, so we need them to appear to be + * just like any other fs tree WRT lockdep. + * + * The exception however is in replace_path() in relocation, where we + * hold the lock on the original fs root and then search for the reloc + * root. At that point we need to make sure any reloc root buffers are + * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make + * lockdep happy. + */ + if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID && + !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state)) + lockdep_owner = BTRFS_FS_TREE_OBJECTID; + /* * This needs to stay, because we could allocate a freed block from an * old tree into a new tree, so we need to make sure this new block is * set to the appropriate level and owner. */ - btrfs_set_buffer_lockdep_class(owner, buf, level); + btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level); + __btrfs_tree_lock(buf, nest); btrfs_clean_tree_block(buf); clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a90546b3107c..0346a0d95729 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -6107,6 +6107,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *exists = NULL; struct page *p; struct address_space *mapping = fs_info->btree_inode->i_mapping; + u64 lockdep_owner = owner_root; int uptodate = 1; int ret;
@@ -6141,7 +6142,15 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) return ERR_PTR(-ENOMEM); - btrfs_set_buffer_lockdep_class(owner_root, eb, level); + + /* + * The reloc trees are just snapshots, so we need them to appear to be + * just like any other fs tree WRT lockdep. + */ + if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID) + lockdep_owner = BTRFS_FS_TREE_OBJECTID; + + btrfs_set_buffer_lockdep_class(lockdep_owner, eb, level);
num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++, index++) { diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 5747c63929df..9063072b399b 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -91,6 +91,13 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]); }
+void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb) +{ + if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state)) + btrfs_set_buffer_lockdep_class(root->root_key.objectid, + eb, btrfs_header_level(eb)); +} + #endif
/* @@ -244,6 +251,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
while (1) { eb = btrfs_root_node(root); + + btrfs_maybe_reset_lockdep_class(root, eb); btrfs_tree_lock(eb); if (eb == root->node) break; @@ -265,6 +274,8 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
while (1) { eb = btrfs_root_node(root); + + btrfs_maybe_reset_lockdep_class(root, eb); btrfs_tree_read_lock(eb); if (eb == root->node) break; diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 97370ec0cd29..26a2f962c268 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -132,11 +132,16 @@ void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock);
#ifdef CONFIG_DEBUG_LOCK_ALLOC void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level); +void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb); #else static inline void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level) { } +static inline void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, + struct extent_buffer *eb) +{ +} #endif
#endif diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 429a198f8937..689176c5f748 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1326,7 +1326,9 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc, btrfs_release_path(path);
path->lowest_level = level; + set_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state); ret = btrfs_search_slot(trans, src, &key, path, 0, 1); + clear_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &src->state); path->lowest_level = 0; if (ret) { if (ret > 0)
From: Josef Bacik josef@toxicpanda.com
[ Upstream commit 899b7f69f244e539ea5df1b4d756046337de44a5 ]
We're seeing a weird problem in production where we have overlapping extent items in the extent tree. It's unclear where these are coming from, and in debugging we realized there's no check in the tree checker for this sort of problem. Add a check to the tree-checker to make sure that the extents do not overlap each other.
Reviewed-by: Qu Wenruo wqu@suse.com Signed-off-by: Josef Bacik josef@toxicpanda.com Reviewed-by: David Sterba dsterba@suse.com Signed-off-by: David Sterba dsterba@suse.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/btrfs/tree-checker.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 51382d2be3d4..a84d2d489510 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1216,7 +1216,8 @@ static void extent_err(const struct extent_buffer *eb, int slot, }
static int check_extent_item(struct extent_buffer *leaf, - struct btrfs_key *key, int slot) + struct btrfs_key *key, int slot, + struct btrfs_key *prev_key) { struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_extent_item *ei; @@ -1436,6 +1437,26 @@ static int check_extent_item(struct extent_buffer *leaf, total_refs, inline_refs); return -EUCLEAN; } + + if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) || + (prev_key->type == BTRFS_METADATA_ITEM_KEY)) { + u64 prev_end = prev_key->objectid; + + if (prev_key->type == BTRFS_METADATA_ITEM_KEY) + prev_end += fs_info->nodesize; + else + prev_end += prev_key->offset; + + if (unlikely(prev_end > key->objectid)) { + extent_err(leaf, slot, + "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]", + prev_key->objectid, prev_key->type, + prev_key->offset, key->objectid, key->type, + key->offset); + return -EUCLEAN; + } + } + return 0; }
@@ -1604,7 +1625,7 @@ static int check_leaf_item(struct extent_buffer *leaf, break; case BTRFS_EXTENT_ITEM_KEY: case BTRFS_METADATA_ITEM_KEY: - ret = check_extent_item(leaf, key, slot); + ret = check_extent_item(leaf, key, slot, prev_key); break; case BTRFS_TREE_BLOCK_REF_KEY: case BTRFS_SHARED_DATA_REF_KEY:
From: Yang Jihong yangjihong1@huawei.com
[ Upstream commit c3b0f72e805f0801f05fa2aa52011c4bfc694c44 ]
ftrace_startup does not remove ops from ftrace_ops_list when ftrace_startup_enable fails:
register_ftrace_function ftrace_startup __register_ftrace_function ... add_ftrace_ops(&ftrace_ops_list, ops) ... ... ftrace_startup_enable // if ftrace failed to modify, ftrace_disabled is set to 1 ... return 0 // ops is in the ftrace_ops_list.
When ftrace_disabled = 1, unregister_ftrace_function simply returns without doing anything: unregister_ftrace_function ftrace_shutdown if (unlikely(ftrace_disabled)) return -ENODEV; // return here, __unregister_ftrace_function is not executed, // as a result, ops is still in the ftrace_ops_list __unregister_ftrace_function ...
If ops is dynamically allocated, it will be free later, in this case, is_ftrace_trampoline accesses NULL pointer:
is_ftrace_trampoline ftrace_ops_trampoline do_for_each_ftrace_op(op, ftrace_ops_list) // OOPS! op may be NULL!
Syzkaller reports as follows: [ 1203.506103] BUG: kernel NULL pointer dereference, address: 000000000000010b [ 1203.508039] #PF: supervisor read access in kernel mode [ 1203.508798] #PF: error_code(0x0000) - not-present page [ 1203.509558] PGD 800000011660b067 P4D 800000011660b067 PUD 130fb8067 PMD 0 [ 1203.510560] Oops: 0000 [#1] SMP KASAN PTI [ 1203.511189] CPU: 6 PID: 29532 Comm: syz-executor.2 Tainted: G B W 5.10.0 #8 [ 1203.512324] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [ 1203.513895] RIP: 0010:is_ftrace_trampoline+0x26/0xb0 [ 1203.514644] Code: ff eb d3 90 41 55 41 54 49 89 fc 55 53 e8 f2 00 fd ff 48 8b 1d 3b 35 5d 03 e8 e6 00 fd ff 48 8d bb 90 00 00 00 e8 2a 81 26 00 <48> 8b ab 90 00 00 00 48 85 ed 74 1d e8 c9 00 fd ff 48 8d bb 98 00 [ 1203.518838] RSP: 0018:ffffc900012cf960 EFLAGS: 00010246 [ 1203.520092] RAX: 0000000000000000 RBX: 000000000000007b RCX: ffffffff8a331866 [ 1203.521469] RDX: 0000000000000000 RSI: 0000000000000008 RDI: 000000000000010b [ 1203.522583] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffff8df18b07 [ 1203.523550] R10: fffffbfff1be3160 R11: 0000000000000001 R12: 0000000000478399 [ 1203.524596] R13: 0000000000000000 R14: ffff888145088000 R15: 0000000000000008 [ 1203.525634] FS: 00007f429f5f4700(0000) GS:ffff8881daf00000(0000) knlGS:0000000000000000 [ 1203.526801] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1203.527626] CR2: 000000000000010b CR3: 0000000170e1e001 CR4: 00000000003706e0 [ 1203.528611] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 1203.529605] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Therefore, when ftrace_startup_enable fails, we need to rollback registration process and remove ops from ftrace_ops_list.
Link: https://lkml.kernel.org/r/20220818032659.56209-1-yangjihong1@huawei.com
Suggested-by: Steven Rostedt rostedt@goodmis.org Signed-off-by: Yang Jihong yangjihong1@huawei.com Signed-off-by: Steven Rostedt (Google) rostedt@goodmis.org Signed-off-by: Sasha Levin sashal@kernel.org --- kernel/trace/ftrace.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e215a9c96971..e10cf1b54812 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2901,6 +2901,16 @@ int ftrace_startup(struct ftrace_ops *ops, int command)
ftrace_startup_enable(command);
+ /* + * If ftrace is in an undefined state, we just remove ops from list + * to prevent the NULL pointer, instead of totally rolling it back and + * free trampoline, because those actions could cause further damage. + */ + if (unlikely(ftrace_disabled)) { + __unregister_ftrace_function(ops); + return -ENODEV; + } + ops->flags &= ~FTRACE_OPS_FL_ADDING;
return 0;
linux-stable-mirror@lists.linaro.org