July 2024 - Linux-stable-mirror

[PATCH net 1/2] ice: Add a per-VF limit on number of FDIR filters

by Tony Nguyen

From: Ahmed Zaki <ahmed.zaki(a)intel.com> While the iavf driver adds a s/w limit (128) on the number of FDIR filters that the VF can request, a malicious VF driver can request more than that and exhaust the resources for other VFs. Add a similar limit in ice. CC: stable(a)vger.kernel.org Fixes: 1f7ea1cd6a37 ("ice: Enable FDIR Configure for AVF") Reviewed-by: Przemek Kitszel <przemyslaw.kitszel(a)intel.com> Suggested-by: Sridhar Samudrala <sridhar.samudrala(a)intel.com> Signed-off-by: Ahmed Zaki <ahmed.zaki(a)intel.com> Reviewed-by: Wojciech Drewek <wojciech.drewek(a)intel.com> Tested-by: Rafal Romanowski <rafal.romanowski(a)intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com> --- .../net/ethernet/intel/ice/ice_ethtool_fdir.c | 2 +- drivers/net/ethernet/intel/ice/ice_fdir.h | 3 +++ .../net/ethernet/intel/ice/ice_virtchnl_fdir.c | 16 ++++++++++++++++ .../net/ethernet/intel/ice/ice_virtchnl_fdir.h | 1 + 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index e3cab8e98f52..5412eff8ef23 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -534,7 +534,7 @@ ice_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp, * * Returns the number of available flow director filters to this VSI */ -static int ice_fdir_num_avail_fltr(struct ice_hw *hw, struct ice_vsi *vsi) +int ice_fdir_num_avail_fltr(struct ice_hw *hw, struct ice_vsi *vsi) { u16 vsi_num = ice_get_hw_vsi_num(hw, vsi->idx); u16 num_guar; diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h index 021ecbac7848..ab5b118daa2d 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.h +++ b/drivers/net/ethernet/intel/ice/ice_fdir.h @@ -207,6 +207,8 @@ struct ice_fdir_base_pkt { const u8 *tun_pkt; }; +struct ice_vsi; + int ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id); int ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id); int ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr); @@ -218,6 +220,7 @@ int ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, u8 *pkt, bool frag, bool tun); int ice_get_fdir_cnt_all(struct ice_hw *hw); +int ice_fdir_num_avail_fltr(struct ice_hw *hw, struct ice_vsi *vsi); bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input); bool ice_fdir_has_frag(enum ice_fltr_ptype flow); struct ice_fdir_fltr * diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 8e4ff3af86c6..b4feb0927687 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -536,6 +536,8 @@ static void ice_vc_fdir_reset_cnt_all(struct ice_vf_fdir *fdir) fdir->fdir_fltr_cnt[flow][0] = 0; fdir->fdir_fltr_cnt[flow][1] = 0; } + + fdir->fdir_fltr_cnt_total = 0; } /** @@ -1560,6 +1562,7 @@ ice_vc_add_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx, resp->status = status; resp->flow_id = conf->flow_id; vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]++; + vf->fdir.fdir_fltr_cnt_total++; ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret, (u8 *)resp, len); @@ -1624,6 +1627,7 @@ ice_vc_del_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx, resp->status = status; ice_vc_fdir_remove_entry(vf, conf, conf->flow_id); vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]--; + vf->fdir.fdir_fltr_cnt_total--; ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret, (u8 *)resp, len); @@ -1790,6 +1794,7 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) struct virtchnl_fdir_add *stat = NULL; struct virtchnl_fdir_fltr_conf *conf; enum virtchnl_status_code v_ret; + struct ice_vsi *vf_vsi; struct device *dev; struct ice_pf *pf; int is_tun = 0; @@ -1798,6 +1803,17 @@ int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) pf = vf->pf; dev = ice_pf_to_dev(pf); + vf_vsi = ice_get_vf_vsi(vf); + +#define ICE_VF_MAX_FDIR_FILTERS 128 + if (!ice_fdir_num_avail_fltr(&pf->hw, vf_vsi) || + vf->fdir.fdir_fltr_cnt_total >= ICE_VF_MAX_FDIR_FILTERS) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(dev, "Max number of FDIR filters for VF %d is reached\n", + vf->vf_id); + goto err_exit; + } + ret = ice_vc_fdir_param_check(vf, fltr->vsi_id); if (ret) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h index c5bcc8d7481c..ac6dcab454b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h @@ -29,6 +29,7 @@ struct ice_vf_fdir_ctx { struct ice_vf_fdir { u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX]; int prof_entry_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX]; + u16 fdir_fltr_cnt_total; struct ice_fd_hw_prof **fdir_prof; struct idr fdir_rule_idr; -- 2.41.0

11 months, 3 weeks

1
0
0 0

Re: [PATCH 6.9 000/163] 6.9.11-rc1 review

by Ronald Warsow

Hi Greg no regressions here on x86_64 (RKL, Intel 11th Gen. CPU) Thanks Tested-by: Ronald Warsow <rwarsow(a)gmx.de>

11 months, 3 weeks

1
0
0 0

[PATCH] clk: samsung: fix getting Exynos4 fin_pll rate from external clocks

by Krzysztof Kozlowski

Commit 0dc83ad8bfc9 ("clk: samsung: Don't register clkdev lookup for the fixed rate clocks") claimed registering clkdev lookup is not necessary anymore, but that was not entirely true: Exynos4210/4212/4412 clock code still relied on it to get the clock rate of xxti or xusbxti external clocks. Drop that requirement by accessing already registered clk_hw when looking up the xxti/xusbxti rate. Reported-by: Artur Weber <aweber.kernel(a)gmail.com> Closes: https://lore.kernel.org/all/6227c1fb-d769-462a-b79b-abcc15d3db8e@gmail.com/ Fixes: 0dc83ad8bfc9 ("clk: samsung: Don't register clkdev lookup for the fixed rate clocks") Cc: <stable(a)vger.kernel.org> Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org> --- drivers/clk/samsung/clk-exynos4.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index a026ccca7315..28945b6b0ee1 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -1040,19 +1040,20 @@ static unsigned long __init exynos4_get_xom(void) static void __init exynos4_clk_register_finpll(struct samsung_clk_provider *ctx) { struct samsung_fixed_rate_clock fclk; - struct clk *clk; - unsigned long finpll_f = 24000000; + unsigned long finpll_f; + unsigned int parent; char *parent_name; unsigned int xom = exynos4_get_xom(); parent_name = xom & 1 ? "xusbxti" : "xxti"; - clk = clk_get(NULL, parent_name); - if (IS_ERR(clk)) { + parent = xom & 1 ? CLK_XUSBXTI : CLK_XXTI; + + finpll_f = clk_hw_get_rate(ctx->clk_data.hws[parent]); + if (!finpll_f) { pr_err("%s: failed to lookup parent clock %s, assuming " "fin_pll clock frequency is 24MHz\n", __func__, parent_name); - } else { - finpll_f = clk_get_rate(clk); + finpll_f = 24000000; } fclk.id = CLK_FIN_PLL; -- 2.43.0

11 months, 3 weeks

3
2
0 0

[PATCH v2 4.19 1/4] ext4: check and update i_disksize properly

by WangYuli

From: Zhang Yi <yi.zhang(a)huawei.com> commit 4df031ff5876d94b48dd9ee486ba5522382a06b2 upstream After commit 3da40c7b0898 ("ext4: only call ext4_truncate when size <= isize"), i_disksize could always be updated to i_size in ext4_setattr(), and we could sure that i_disksize <= i_size since holding inode lock and if i_disksize < i_size there are delalloc writes pending in the range upto i_size. If the end of the current write is <= i_size, there's no need to touch i_disksize since writeback will push i_disksize upto i_size eventually. So we can switch to check i_size instead of i_disksize in ext4_da_write_end() when write to the end of the file. we also could remove ext4_mark_inode_dirty() together because we defer inode dirtying to generic_write_end() or ext4_da_write_inline_data_end(). Cc: stable(a)vger.kernel.org Signed-off-by: Zhang Yi <yi.zhang(a)huawei.com> Reviewed-by: Jan Kara <jack(a)suse.cz> Signed-off-by: Theodore Ts'o <tytso(a)mit.edu> Link: https://lore.kernel.org/r/20210716122024.1105856-2-yi.zhang@huawei.com Reviewed-by: Cheng Nie <niecheng1(a)uniontech.com> Signed-off-by: Dandan Zhang <zhangdandan(a)uniontech.com> Signed-off-by: WangYuli <wangyuli(a)uniontech.com> --- fs/ext4/inode.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 646285fbc9fc..d8a8e4ee5ff8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3207,35 +3207,37 @@ static int ext4_da_write_end(struct file *file, end = start + copied - 1; /* - * generic_write_end() will run mark_inode_dirty() if i_size - * changes. So let's piggyback the i_disksize mark_inode_dirty - * into that. + * Since we are holding inode lock, we are sure i_disksize <= + * i_size. We also know that if i_disksize < i_size, there are + * delalloc writes pending in the range upto i_size. If the end of + * the current write is <= i_size, there's no need to touch + * i_disksize since writeback will push i_disksize upto i_size + * eventually. If the end of the current write is > i_size and + * inside an allocated block (ext4_da_should_update_i_disksize() + * check), we need to update i_disksize here as neither + * ext4_writepage() nor certain ext4_writepages() paths not + * allocating blocks update i_disksize. + * + * Note that we defer inode dirtying to generic_write_end() / + * ext4_da_write_inline_data_end(). */ new_i_size = pos + copied; - if (copied && new_i_size > EXT4_I(inode)->i_disksize) { + if (copied && new_i_size > inode->i_size) { if (ext4_has_inline_data(inode) || - ext4_da_should_update_i_disksize(page, end)) { + ext4_da_should_update_i_disksize(page, end)) ext4_update_i_disksize(inode, new_i_size); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ext4_mark_inode_dirty(handle, inode); - } } if (write_mode != CONVERT_INLINE_DATA && ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && ext4_has_inline_data(inode)) - ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied, + ret = ext4_da_write_inline_data_end(inode, pos, len, copied, page); else - ret2 = generic_write_end(file, mapping, pos, len, copied, + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - copied = ret2; - if (ret2 < 0) - ret = ret2; + copied = ret; ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; -- 2.43.4

11 months, 3 weeks

2
1
0 0

[PATCH 6.1 1/3] netfs, fscache: export fscache_put_volume() and add fscache_try_get_volume()

by libaokun＠huaweicloud.com

From: Baokun Li <libaokun1(a)huawei.com> [ Upstream commit 85b08b31a22b481ec6528130daf94eee4452e23f ] Export fscache_put_volume() and add fscache_try_get_volume() helper function to allow cachefiles to get/put fscache_volume via linux/fscache-cache.h. Signed-off-by: Baokun Li <libaokun1(a)huawei.com> Link: https://lore.kernel.org/r/20240628062930.2467993-2-libaokun@huaweicloud.com Signed-off-by: Christian Brauner <brauner(a)kernel.org> Stable-dep-of: 522018a0de6b ("cachefiles: fix slab-use-after-free in fscache_withdraw_volume()") Stable-dep-of: 5d8f80578907 ("cachefiles: fix slab-use-after-free in cachefiles_withdraw_cookie()") Signed-off-by: Baokun Li <libaokun1(a)huawei.com> --- fs/fscache/internal.h | 2 -- fs/fscache/volume.c | 14 ++++++++++++++ include/linux/fscache-cache.h | 6 ++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index 1336f517e9b1..4799a722bc28 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h @@ -145,8 +145,6 @@ extern const struct seq_operations fscache_volumes_seq_ops; struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, enum fscache_volume_trace where); -void fscache_put_volume(struct fscache_volume *volume, - enum fscache_volume_trace where); bool fscache_begin_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c index cdf991bdd9de..cb75c07b5281 100644 --- a/fs/fscache/volume.c +++ b/fs/fscache/volume.c @@ -27,6 +27,19 @@ struct fscache_volume *fscache_get_volume(struct fscache_volume *volume, return volume; } +struct fscache_volume *fscache_try_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where) +{ + int ref; + + if (!__refcount_inc_not_zero(&volume->ref, &ref)) + return NULL; + + trace_fscache_volume(volume->debug_id, ref + 1, where); + return volume; +} +EXPORT_SYMBOL(fscache_try_get_volume); + static void fscache_see_volume(struct fscache_volume *volume, enum fscache_volume_trace where) { @@ -420,6 +433,7 @@ void fscache_put_volume(struct fscache_volume *volume, fscache_free_volume(volume); } } +EXPORT_SYMBOL(fscache_put_volume); /* * Relinquish a volume representation cookie. diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index a174cedf4d90..35e86d2f2887 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -19,6 +19,7 @@ enum fscache_cache_trace; enum fscache_cookie_trace; enum fscache_access_trace; +enum fscache_volume_trace; enum fscache_cache_state { FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */ @@ -97,6 +98,11 @@ extern void fscache_withdraw_cookie(struct fscache_cookie *cookie); extern void fscache_io_error(struct fscache_cache *cache); +extern struct fscache_volume * +fscache_try_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); +extern void fscache_put_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); extern void fscache_end_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); -- 2.39.2

11 months, 3 weeks

2
3
0 0

[PATCH v2] usb: typec: ucsi: Fix NULL pointer dereference in ucsi_displayport_vdm()

by Ma Ke

When dp->con->partner is an error, a NULL pointer dereference may occur. Add a check for dp->con->partner to avoid dereferencing a NULL pointer. Cc: stable(a)vger.kernel.org Fixes: 372adf075a43 ("usb: typec: ucsi: Determine common SVDM Version") Signed-off-by: Ma Ke <make24(a)iscas.ac.cn> --- Changes in v2: - added Cc stable line; - fixed a typo. --- drivers/usb/typec/ucsi/displayport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 420af5139c70..ecc706e0800d 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -222,6 +222,8 @@ static int ucsi_displayport_vdm(struct typec_altmode *alt, switch (cmd_type) { case CMDT_INIT: if (PD_VDO_SVDM_VER(header) < svdm_version) { + if (IS_ERR_OR_NULL(dp->con->partner)) + break; typec_partner_set_svdm_version(dp->con->partner, PD_VDO_SVDM_VER(header)); svdm_version = PD_VDO_SVDM_VER(header); } -- 2.25.1

11 months, 3 weeks

3
2
0 0

[PATCH stable 4.19-6.6] filelock: Remove locks reliably when fcntl/close race is detected

by Jann Horn

commit 3cad1bc010416c6dd780643476bc59ed742436b9 upstream. When fcntl_setlk() races with close(), it removes the created lock with do_lock_file_wait(). However, LSMs can allow the first do_lock_file_wait() that created the lock while denying the second do_lock_file_wait() that tries to remove the lock. In theory (but AFAIK not in practice), posix_lock_file() could also fail to remove a lock due to GFP_KERNEL allocation failure (when splitting a range in the middle). After the bug has been triggered, use-after-free reads will occur in lock_get_status() when userspace reads /proc/locks. This can likely be used to read arbitrary kernel memory, but can't corrupt kernel memory. This only affects systems with SELinux / Smack / AppArmor / BPF-LSM in enforcing mode and only works from some security contexts. Fix it by calling locks_remove_posix() instead, which is designed to reliably get rid of POSIX locks associated with the given file and files_struct and is also used by filp_flush(). Fixes: c293621bbf67 ("[PATCH] stale POSIX lock handling") Cc: stable(a)kernel.org Link: https://bugs.chromium.org/p/project-zero/issues/detail?id=2563 Signed-off-by: Jann Horn <jannh(a)google.com> Link: https://lore.kernel.org/r/20240702-fs-lock-recover-2-v1-1-edd456f63789@goog… Reviewed-by: Jeff Layton <jlayton(a)kernel.org> Signed-off-by: Christian Brauner <brauner(a)kernel.org> [stable fixup: ->c.flc_type was ->fl_type in older kernels] Signed-off-by: Jann Horn <jannh(a)google.com> --- fs/locks.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index fb717dae9029..31659a2d9862 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2381,8 +2381,9 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, error = do_lock_file_wait(filp, cmd, file_lock); /* - * Attempt to detect a close/fcntl race and recover by releasing the - * lock that was just acquired. There is no need to do that when we're + * Detect close/fcntl races and recover by zapping all POSIX locks + * associated with this file and our files_struct, just like on + * filp_flush(). There is no need to do that when we're * unlocking though, or for OFD locks. */ if (!error && file_lock->fl_type != F_UNLCK && @@ -2397,9 +2398,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, f = files_lookup_fd_locked(files, fd); spin_unlock(&files->file_lock); if (f != filp) { - file_lock->fl_type = F_UNLCK; - error = do_lock_file_wait(filp, cmd, file_lock); - WARN_ON_ONCE(error); + locks_remove_posix(filp, files); error = -EBADF; } } base-commit: 2eaf5c0d81911ba05bace3a722cbcd708fdbbcba -- 2.45.2.1089.g2a221341d9-goog

11 months, 3 weeks

2
8
0 0

[PATCH] mm: Fix endless reclaim on machines with unaccepted memory.

by Kirill A. Shutemov

Unaccepted memory is considered unusable free memory, which is not counted as free on the zone watermark check. This causes get_page_from_freelist() to accept more memory to hit the high watermark, but it creates problems in the reclaim path. The reclaim path encounters a failed zone watermark check and attempts to reclaim memory. This is usually successful, but if there is little or no reclaimable memory, it can result in endless reclaim with little to no progress. This can occur early in the boot process, just after start of the init process when the only reclaimable memory is the page cache of the init executable and its libraries. To address this issue, teach shrink_node() and shrink_zones() to accept memory before attempting to reclaim. Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Reported-by: Jianxiong Gao <jxgao(a)google.com> Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory") Cc: stable(a)vger.kernel.org # v6.5+ --- mm/internal.h | 9 +++++++++ mm/page_alloc.c | 8 +------- mm/vmscan.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index cc2c5e07fad3..ea55cbad061f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1515,4 +1515,13 @@ static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry, void workingset_update_node(struct xa_node *node); extern struct list_lru shadow_nodes; +#ifdef CONFIG_UNACCEPTED_MEMORY +bool try_to_accept_memory(struct zone *zone, unsigned int order); +#else +static inline bool try_to_accept_memory(struct zone *zone, unsigned int order) +{ + return false; +} +#endif /* CONFIG_UNACCEPTED_MEMORY */ + #endif /* __MM_INTERNAL_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9ecf99190ea2..9a108c92245f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,6 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -6940,7 +6939,7 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +bool try_to_accept_memory(struct zone *zone, unsigned int order) { long to_accept; int ret = false; @@ -6999,11 +6998,6 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) -{ - return false; -} - static inline bool has_unaccepted_memory(void) { return false; diff --git a/mm/vmscan.c b/mm/vmscan.c index 2e34de9cd0d4..b2af1263b1bc 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5900,12 +5900,44 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL))); } +#ifdef CONFIG_UNACCEPTED_MEMORY +static bool node_try_to_accept_memory(pg_data_t *pgdat, struct scan_control *sc) +{ + bool progress = false; + struct zone *zone; + int z; + + for (z = 0; z <= sc->reclaim_idx; z++) { + zone = pgdat->node_zones + z; + if (!managed_zone(zone)) + continue; + + if (try_to_accept_memory(zone, sc->order)) + progress = true; + } + + return progress; +} +#else +static inline bool node_try_to_accept_memory(pg_data_t *pgdat, + struct scan_control *sc) +{ + return false; +} +#endif /* CONFIG_UNACCEPTED_MEMORY */ + static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) { unsigned long nr_reclaimed, nr_scanned, nr_node_reclaimed; struct lruvec *target_lruvec; bool reclaimable = false; + /* Try to accept memory before going for reclaim */ + if (node_try_to_accept_memory(pgdat, sc)) { + if (!should_continue_reclaim(pgdat, 0, sc)) + return; + } + if (lru_gen_enabled() && root_reclaim(sc)) { lru_gen_shrink_node(pgdat, sc); return; @@ -6118,6 +6150,10 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) GFP_KERNEL | __GFP_HARDWALL)) continue; + /* Try to accept memory before going for reclaim */ + if (try_to_accept_memory(zone, sc->order)) + continue; + /* * If we already have plenty of memory free for * compaction in this zone, don't free any more. -- 2.43.0

11 months, 3 weeks

4
9
0 0

[PATCH v5.10] ext4: fix error code saved on super block during file system abort

by Ajay Kaher

From: Gabriel Krisman Bertazi <krisman(a)collabora.com> [ Upstream commit 124e7c61deb27d758df5ec0521c36cf08d417f7a ] ext4_abort will eventually call ext4_errno_to_code, which translates the errno to an EXT4_ERR specific error. This means that ext4_abort expects an errno. By using EXT4_ERR_ here, it gets misinterpreted (as an errno), and ends up saving EXT4_ERR_EBUSY on the superblock during an abort, which makes no sense. ESHUTDOWN will get properly translated to EXT4_ERR_SHUTDOWN, so use that instead. Signed-off-by: Gabriel Krisman Bertazi <krisman(a)collabora.com> Link: https://lore.kernel.org/r/20211026173302.84000-1-krisman@collabora.com Signed-off-by: Theodore Ts'o <tytso(a)mit.edu> Signed-off-by: Ajay Kaher <ajay.kaher(a)broadcom.com> --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 160e5824948270..0e8406f5bf0aa0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5820,7 +5820,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) - ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user"); + ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); -- cgit 1.2.3-korg

11 months, 3 weeks

2
2
0 0

[PATCH 5.10] scsi: core: Fix a use-after-free

by Maximilian Heyne

From: Bart Van Assche <bvanassche(a)acm.org> [ Upstream commit 8fe4ce5836e932f5766317cb651c1ff2a4cd0506 ] There are two .exit_cmd_priv implementations. Both implementations use resources associated with the SCSI host. Make sure that these resources are still available when .exit_cmd_priv is called by waiting inside scsi_remove_host() until the tag set has been freed. This commit fixes the following use-after-free: ================================================================== BUG: KASAN: use-after-free in srp_exit_cmd_priv+0x27/0xd0 [ib_srp] Read of size 8 at addr ffff888100337000 by task multipathd/16727 Call Trace: <TASK> dump_stack_lvl+0x34/0x44 print_report.cold+0x5e/0x5db kasan_report+0xab/0x120 srp_exit_cmd_priv+0x27/0xd0 [ib_srp] scsi_mq_exit_request+0x4d/0x70 blk_mq_free_rqs+0x143/0x410 __blk_mq_free_map_and_rqs+0x6e/0x100 blk_mq_free_tag_set+0x2b/0x160 scsi_host_dev_release+0xf3/0x1a0 device_release+0x54/0xe0 kobject_put+0xa5/0x120 device_release+0x54/0xe0 kobject_put+0xa5/0x120 scsi_device_dev_release_usercontext+0x4c1/0x4e0 execute_in_process_context+0x23/0x90 device_release+0x54/0xe0 kobject_put+0xa5/0x120 scsi_disk_release+0x3f/0x50 device_release+0x54/0xe0 kobject_put+0xa5/0x120 disk_release+0x17f/0x1b0 device_release+0x54/0xe0 kobject_put+0xa5/0x120 dm_put_table_device+0xa3/0x160 [dm_mod] dm_put_device+0xd0/0x140 [dm_mod] free_priority_group+0xd8/0x110 [dm_multipath] free_multipath+0x94/0xe0 [dm_multipath] dm_table_destroy+0xa2/0x1e0 [dm_mod] __dm_destroy+0x196/0x350 [dm_mod] dev_remove+0x10c/0x160 [dm_mod] ctl_ioctl+0x2c2/0x590 [dm_mod] dm_ctl_ioctl+0x5/0x10 [dm_mod] __x64_sys_ioctl+0xb4/0xf0 dm_ctl_ioctl+0x5/0x10 [dm_mod] __x64_sys_ioctl+0xb4/0xf0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Link: https://lore.kernel.org/r/20220826002635.919423-1-bvanassche@acm.org Fixes: 65ca846a5314 ("scsi: core: Introduce {init,exit}_cmd_priv()") Cc: Ming Lei <ming.lei(a)redhat.com> Cc: Christoph Hellwig <hch(a)lst.de> Cc: Mike Christie <michael.christie(a)oracle.com> Cc: Hannes Reinecke <hare(a)suse.de> Cc: John Garry <john.garry(a)huawei.com> Cc: Li Zhijian <lizhijian(a)fujitsu.com> Reported-by: Li Zhijian <lizhijian(a)fujitsu.com> Tested-by: Li Zhijian <lizhijian(a)fujitsu.com> Signed-off-by: Bart Van Assche <bvanassche(a)acm.org> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> [mheyne: fixed contextual conflicts: - drivers/scsi/hosts.c: due to missing commit 973dac8a8a14 ("scsi: core: Refine how we set tag_set NUMA node") - drivers/scsi/scsi_sysfs.c: due to missing commit 6f8191fdf41d ("block: simplify disk shutdown") - drivers/scsi/scsi_scan.c: due to missing commit 59506abe5e34 ("scsi: core: Inline scsi_mq_alloc_queue()")] Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de> Cc: stable(a)vger.kernel.org # v5.10 --- drivers/scsi/hosts.c | 16 +++++++++++++--- drivers/scsi/scsi_lib.c | 6 +++++- drivers/scsi/scsi_priv.h | 2 +- drivers/scsi/scsi_scan.c | 1 + drivers/scsi/scsi_sysfs.c | 1 + include/scsi/scsi_host.h | 2 ++ 6 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 297f2b412d07..17fa1cd91da6 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -182,6 +182,15 @@ void scsi_remove_host(struct Scsi_Host *shost) scsi_proc_host_rm(shost); scsi_proc_hostdir_rm(shost->hostt); + /* + * New SCSI devices cannot be attached anymore because of the SCSI host + * state so drop the tag set refcnt. Wait until the tag set refcnt drops + * to zero because .exit_cmd_priv implementations may need the host + * pointer. + */ + kref_put(&shost->tagset_refcnt, scsi_mq_free_tags); + wait_for_completion(&shost->tagset_freed); + spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_DEL)) BUG_ON(scsi_host_set_state(shost, SHOST_DEL_RECOVERY)); @@ -240,6 +249,9 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, shost->dma_dev = dma_dev; + kref_init(&shost->tagset_refcnt); + init_completion(&shost->tagset_freed); + /* * Increase usage count temporarily here so that calling * scsi_autopm_put_host() will trigger runtime idle if there is @@ -312,6 +324,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, pm_runtime_disable(&shost->shost_gendev); pm_runtime_set_suspended(&shost->shost_gendev); pm_runtime_put_noidle(&shost->shost_gendev); + kref_put(&shost->tagset_refcnt, scsi_mq_free_tags); fail: return error; } @@ -344,9 +357,6 @@ static void scsi_host_dev_release(struct device *dev) kfree(dev_name(&shost->shost_dev)); } - if (shost->tag_set.tags) - scsi_mq_destroy_tags(shost); - kfree(shost->shost_data); ida_simple_remove(&host_index_ida, shost->host_no); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 14dec86ff749..64ae7bc2de60 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1923,9 +1923,13 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) return blk_mq_alloc_tag_set(tag_set); } -void scsi_mq_destroy_tags(struct Scsi_Host *shost) +void scsi_mq_free_tags(struct kref *kref) { + struct Scsi_Host *shost = container_of(kref, typeof(*shost), + tagset_refcnt); + blk_mq_free_tag_set(&shost->tag_set); + complete(&shost->tagset_freed); } /** diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 1183dbed687c..3fee9af05925 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -93,7 +93,7 @@ extern void scsi_requeue_run_queue(struct work_struct *work); extern struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev); extern void scsi_start_queue(struct scsi_device *sdev); extern int scsi_mq_setup_tags(struct Scsi_Host *shost); -extern void scsi_mq_destroy_tags(struct Scsi_Host *shost); +extern void scsi_mq_free_tags(struct kref *kref); extern void scsi_exit_queue(void); extern void scsi_evt_thread(struct work_struct *work); struct request_queue; diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 6f7c4d41c51d..e8703b043805 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -273,6 +273,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, kfree(sdev); goto out; } + kref_get(&sdev->host->tagset_refcnt); WARN_ON_ONCE(!blk_get_queue(sdev->request_queue)); sdev->request_queue->queuedata = sdev; diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 6cc4d0792e3d..bb37d698da1a 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1481,6 +1481,7 @@ void __scsi_remove_device(struct scsi_device *sdev) mutex_unlock(&sdev->state_mutex); blk_cleanup_queue(sdev->request_queue); + kref_put(&sdev->host->tagset_refcnt, scsi_mq_free_tags); cancel_work_sync(&sdev->requeue_work); if (sdev->host->hostt->slave_destroy) diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 4a9f1e6e3aac..3979e946c3fd 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -548,6 +548,8 @@ struct Scsi_Host { struct scsi_host_template *hostt; struct scsi_transport_template *transportt; + struct kref tagset_refcnt; + struct completion tagset_freed; /* Area to keep a shared tag map */ struct blk_mq_tag_set tag_set; -- 2.40.1 Amazon Web Services Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597

11 months, 3 weeks

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror July 2024