July 2024 - Linux-stable-mirror

[PATCH v3 2/2] f2fs: use meta inode for GC of COW file

by Sunmin Jeong

In case of the COW file, new updates and GC writes are already separated to page caches of the atomic file and COW file. As some cases that use the meta inode for GC, there are some race issues between a foreground thread and GC thread. To handle them, we need to take care when to invalidate and wait writeback of GC pages in COW files as the case of using the meta inode. Also, a pointer from the COW inode to the original inode is required to check the state of original pages. For the former, we can solve the problem by using the meta inode for GC of COW files. Then let's get a page from the original inode in move_data_block when GCing the COW file to avoid race condition. Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable(a)vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com> Reviewed-by: Yeongjin Gil <youngjin.gil(a)samsung.com> Signed-off-by: Sunmin Jeong <s_min.jeong(a)samsung.com> Reviewed-by: Chao Yu <chao(a)kernel.org> --- v3: - make the mapping variable to select a proper inode v2: - use union for cow inode to point to atomic inode fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 13 +++++++++++-- fs/f2fs/file.c | 3 +++ fs/f2fs/gc.c | 7 +++++-- fs/f2fs/inline.c | 2 +- fs/f2fs/inode.c | 3 ++- 6 files changed, 23 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9a213d03005d..f6b1782f965a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2606,7 +2606,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_NOQUOTA(inode)) return true; - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return true; /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */ if (f2fs_compressed_file(inode) && diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 796ae11c0fa3..4a8621e4a33a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -843,7 +843,11 @@ struct f2fs_inode_info { struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; /* cached extent_tree entry */ - struct inode *cow_inode; /* copy-on-write inode for atomic write */ + union { + struct inode *cow_inode; /* copy-on-write inode for atomic write */ + struct inode *atomic_inode; + /* point to atomic_inode, available only for cow_inode */ + }; /* avoid racing between foreground op and gc */ struct f2fs_rwsem i_gc_rwsem[2]; @@ -4263,9 +4267,14 @@ static inline bool f2fs_post_read_required(struct inode *inode) f2fs_compressed_file(inode); } +static inline bool f2fs_used_in_atomic_write(struct inode *inode) +{ + return f2fs_is_atomic_file(inode) || f2fs_is_cow_file(inode); +} + static inline bool f2fs_meta_inode_gc_required(struct inode *inode) { - return f2fs_post_read_required(inode) || f2fs_is_atomic_file(inode); + return f2fs_post_read_required(inode) || f2fs_used_in_atomic_write(inode); } /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e4a7cff00796..547e7ec32b1f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2183,6 +2183,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); + + /* Set the COW inode's atomic_inode to the atomic inode */ + F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index cb3006551ab5..724bbcb447d3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1171,7 +1171,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, static int ra_data_block(struct inode *inode, pgoff_t index) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct address_space *mapping = inode->i_mapping; + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct dnode_of_data dn; struct page *page; struct f2fs_io_info fio = { @@ -1260,6 +1261,8 @@ static int ra_data_block(struct inode *inode, pgoff_t index) static int move_data_block(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { + struct address_space *mapping = f2fs_is_cow_file(inode) ? + F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), .ino = inode->i_ino, @@ -1282,7 +1285,7 @@ static int move_data_block(struct inode *inode, block_t bidx, CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; /* do not read out */ - page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); + page = f2fs_grab_cache_page(mapping, bidx, false); if (!page) return -ENOMEM; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1fba5728be70..cca7d448e55c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -16,7 +16,7 @@ static bool support_inline_data(struct inode *inode) { - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return false; if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 7a3e2458b2d9..18dea43e694b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -804,8 +804,9 @@ void f2fs_evict_inode(struct inode *inode) f2fs_abort_atomic_write(inode, true); - if (fi->cow_inode) { + if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) { clear_inode_flag(fi->cow_inode, FI_COW_FILE); + F2FS_I(fi->cow_inode)->atomic_inode = NULL; iput(fi->cow_inode); fi->cow_inode = NULL; } -- 2.25.1

1 year

1
0
0 0

[PATCH v2 0/5] ACPI: sysfs: manage sysfs attributes through device core

by Thomas Weißschuh

Simplify the lifecycle of the sysfs attributes by letting the device core manage them. Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net> --- Changes in v2: - Add fix to validate buffer type validation (patch 1) - Drop usage of devm-APIs as these are unusable for unbound devices - Evaluate _STR on each sysfs access - Link to v1: https://lore.kernel.org/r/20240613-acpi-sysfs-groups-v1-0-665e0deb052a@weis… --- Thomas Weißschuh (5): ACPI: sysfs: validate return type of _STR method ACPI: sysfs: evaluate _STR on each sysfs access ACPI: sysfs: manage attributes as attribute_group ACPI: sysfs: manage sysfs attributes through device core ACPI: sysfs: remove return value of acpi_device_setup_files() drivers/acpi/device_sysfs.c | 196 +++++++++++++++++++------------------------- drivers/acpi/internal.h | 3 +- drivers/acpi/scan.c | 6 +- include/acpi/acpi_bus.h | 1 - 4 files changed, 89 insertions(+), 117 deletions(-) --- base-commit: 34afb82a3c67f869267a26f593b6f8fc6bf35905 change-id: 20240609-acpi-sysfs-groups-cfa756d16752 Best regards, -- Thomas Weißschuh <linux(a)weissschuh.net>

1 year

2
3
0 0

Re: [PATCH] net: ks8851: Fix potential TX stall after interface reopen

by Hariprasad Kelam

On 2024-07-10 at 01:28:45, Ronald Wahl (rwahl(a)gmx.de) wrote: > From: Ronald Wahl <ronald.wahl(a)raritan.com> > > The amount of TX space in the hardware buffer is tracked in the tx_space > variable. The initial value is currently only set during driver probing. > > After closing the interface and reopening it the tx_space variable has > the last value it had before close. If it is smaller than the size of > the first send packet after reopeing the interface the queue will be > stopped. The queue is woken up after receiving a TX interrupt but this > will never happen since we did not send anything. > > This commit moves the initialization of the tx_space variable to the > ks8851_net_open function right before starting the TX queue. Also query > the value from the hardware instead of using a hard coded value. > > Only the SPI chip variant is affected by this issue because only this > driver variant actually depends on the tx_space variable in the xmit > function. > > Fixes: 3dc5d4454545 ("net: ks8851: Fix TX stall caused by TX buffer overrun") > Cc: "David S. Miller" <davem(a)davemloft.net> > Cc: Eric Dumazet <edumazet(a)google.com> > Cc: Jakub Kicinski <kuba(a)kernel.org> > Cc: Paolo Abeni <pabeni(a)redhat.com> > Cc: Simon Horman <horms(a)kernel.org> > Cc: netdev(a)vger.kernel.org > Cc: stable(a)vger.kernel.org # 5.10+ > Signed-off-by: Ronald Wahl <ronald.wahl(a)raritan.com> > --- > drivers/net/ethernet/micrel/ks8851_common.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c > index 6453c92f0fa7..03a554df6e7a 100644 > --- a/drivers/net/ethernet/micrel/ks8851_common.c > +++ b/drivers/net/ethernet/micrel/ks8851_common.c > @@ -482,6 +482,7 @@ static int ks8851_net_open(struct net_device *dev) > ks8851_wrreg16(ks, KS_IER, ks->rc_ier); > > ks->queued_len = 0; > + ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR); > netif_start_queue(ks->netdev); > > netif_dbg(ks, ifup, ks->netdev, "network device up\n"); > @@ -1101,7 +1102,6 @@ int ks8851_probe_common(struct net_device *netdev, struct device *dev, > int ret; > > ks->netdev = netdev; > - ks->tx_space = 6144; > > ks->gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); > ret = PTR_ERR_OR_ZERO(ks->gpio); > -- > 2.45.2 > > Reviewed-by: Hariprasad Kelam <hkelam(a)marvell.com>

1 year

1
0
0 0

[PATCH] x86/tdx: Support vmalloc() for tdx_enc_status_changed()

by Dexuan Cui

When a TDX guest runs on Hyper-V, the hv_netvsc driver's netvsc_init_buf() allocates buffers using vzalloc(), and needs to share the buffers with the host OS by calling set_memory_decrypted(), which is not working for vmalloc() yet. Add the support by handling the pages one by one. Co-developed-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Signed-off-by: Dexuan Cui <decui(a)microsoft.com> Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com> Reviewed-by: Michael Kelley <mikelley(a)microsoft.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com> Reviewed-by: Rick Edgecombe <rick.p.edgecombe(a)intel.com> Reviewed-by: Dave Hansen <dave.hansen(a)linux.intel.com> Acked-by: Kai Huang <kai.huang(a)intel.com> Cc: stable(a)vger.kernel.org --- Hi Boris, Kirill and all, This patch was posted on May 20, 2024: Link: https://lore.kernel.org/all/20240521021238.1803-1-decui%40microsoft.com The patch caused an issue to Kirill's kexec TDX patchset, so Kirill fixed it: Link: https://lore.kernel.org/all/uewczuxr5foiwe6wklhcgzi6ejfwgacxxoa67xadey62s46… Kirill agreed that I should repost the patch with his fix combined, hence I'm posting this new version, which is based on tip's master today (at the moment, it's commit aa9d8caba6e4 ("Merge timers/core into tip/master")). I suppose the patch would go in the branch tip/master or x86/tdx. Thanks, Dexuan arch/x86/coco/tdx/tdx.c | 43 ++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 078e2bac25531..8f471260924f7 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <linux/io.h> #include <linux/kexec.h> +#include <linux/mm.h> #include <asm/coco.h> #include <asm/tdx.h> #include <asm/vmx.h> @@ -782,6 +783,19 @@ static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc) return false; } +static bool tdx_enc_status_changed_phys(phys_addr_t start, phys_addr_t end, + bool enc) +{ + if (!tdx_map_gpa(start, end, enc)) + return false; + + /* shared->private conversion requires memory to be accepted before use */ + if (enc) + return tdx_accept_memory(start, end); + + return true; +} + /* * Inform the VMM of the guest's intent for this physical page: shared with * the VMM or private to the guest. The VMM is expected to change its mapping @@ -789,15 +803,30 @@ static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc) */ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) { - phys_addr_t start = __pa(vaddr); - phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); + unsigned long start = vaddr; + unsigned long end = start + numpages * PAGE_SIZE; + unsigned long step = end - start; + unsigned long addr; + + /* Step through page-by-page for vmalloc() mappings */ + if (is_vmalloc_addr((void *)vaddr)) + step = PAGE_SIZE; + + for (addr = start; addr < end; addr += step) { + phys_addr_t start_pa; + phys_addr_t end_pa; + + /* The check fails on vmalloc() mappings */ + if (virt_addr_valid(addr)) + start_pa = __pa(addr); + else + start_pa = slow_virt_to_phys((void *)addr); - if (!tdx_map_gpa(start, end, enc)) - return false; + end_pa = start_pa + step; - /* shared->private conversion requires memory to be accepted before use */ - if (enc) - return tdx_accept_memory(start, end); + if (!tdx_enc_status_changed_phys(start_pa, end_pa, enc)) + return false; + } return true; } -- 2.25.1

1 year

3
10
0 0

[PATCH v2 2/2] f2fs: use meta inode for GC of COW file

by Sunmin Jeong

In case of the COW file, new updates and GC writes are already separated to page caches of the atomic file and COW file. As some cases that use the meta inode for GC, there are some race issues between a foreground thread and GC thread. To handle them, we need to take care when to invalidate and wait writeback of GC pages in COW files as the case of using the meta inode. Also, a pointer from the COW inode to the original inode is required to check the state of original pages. For the former, we can solve the problem by using the meta inode for GC of COW files. Then let's get a page from the original inode in move_data_block when GCing the COW file to avoid race condition. Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable(a)vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo <sj1557.seo(a)samsung.com> Reviewed-by: Yeongjin Gil <youngjin.gil(a)samsung.com> Signed-off-by: Sunmin Jeong <s_min.jeong(a)samsung.com> --- v2: - use union for cow inode to point to atomic inode fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 13 +++++++++++-- fs/f2fs/file.c | 3 +++ fs/f2fs/gc.c | 12 ++++++++++-- fs/f2fs/inline.c | 2 +- fs/f2fs/inode.c | 3 ++- 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9a213d03005d..f6b1782f965a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2606,7 +2606,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_NOQUOTA(inode)) return true; - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return true; /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */ if (f2fs_compressed_file(inode) && diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 796ae11c0fa3..4a8621e4a33a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -843,7 +843,11 @@ struct f2fs_inode_info { struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; /* cached extent_tree entry */ - struct inode *cow_inode; /* copy-on-write inode for atomic write */ + union { + struct inode *cow_inode; /* copy-on-write inode for atomic write */ + struct inode *atomic_inode; + /* point to atomic_inode, available only for cow_inode */ + }; /* avoid racing between foreground op and gc */ struct f2fs_rwsem i_gc_rwsem[2]; @@ -4263,9 +4267,14 @@ static inline bool f2fs_post_read_required(struct inode *inode) f2fs_compressed_file(inode); } +static inline bool f2fs_used_in_atomic_write(struct inode *inode) +{ + return f2fs_is_atomic_file(inode) || f2fs_is_cow_file(inode); +} + static inline bool f2fs_meta_inode_gc_required(struct inode *inode) { - return f2fs_post_read_required(inode) || f2fs_is_atomic_file(inode); + return f2fs_post_read_required(inode) || f2fs_used_in_atomic_write(inode); } /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e4a7cff00796..547e7ec32b1f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2183,6 +2183,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); + + /* Set the COW inode's atomic_inode to the atomic inode */ + F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index cb3006551ab5..61913fcefd9e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1186,7 +1186,11 @@ static int ra_data_block(struct inode *inode, pgoff_t index) }; int err; - page = f2fs_grab_cache_page(mapping, index, true); + if (f2fs_is_cow_file(inode)) + page = f2fs_grab_cache_page(F2FS_I(inode)->atomic_inode->i_mapping, + index, true); + else + page = f2fs_grab_cache_page(mapping, index, true); if (!page) return -ENOMEM; @@ -1282,7 +1286,11 @@ static int move_data_block(struct inode *inode, block_t bidx, CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; /* do not read out */ - page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); + if (f2fs_is_cow_file(inode)) + page = f2fs_grab_cache_page(F2FS_I(inode)->atomic_inode->i_mapping, + bidx, false); + else + page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); if (!page) return -ENOMEM; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1fba5728be70..cca7d448e55c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -16,7 +16,7 @@ static bool support_inline_data(struct inode *inode) { - if (f2fs_is_atomic_file(inode)) + if (f2fs_used_in_atomic_write(inode)) return false; if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode)) return false; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 7a3e2458b2d9..18dea43e694b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -804,8 +804,9 @@ void f2fs_evict_inode(struct inode *inode) f2fs_abort_atomic_write(inode, true); - if (fi->cow_inode) { + if (fi->cow_inode && f2fs_is_cow_file(fi->cow_inode)) { clear_inode_flag(fi->cow_inode, FI_COW_FILE); + F2FS_I(fi->cow_inode)->atomic_inode = NULL; iput(fi->cow_inode); fi->cow_inode = NULL; } -- 2.25.1

1 year

3
3
0 0

[PATCH v4] drm/gma500: fix null pointer dereference in cdv_intel_lvds_get_modes

by Ma Ke

In cdv_intel_lvds_get_modes(), the return value of drm_mode_duplicate() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_mode_duplicate(). Add a check to avoid npd. Cc: stable(a)vger.kernel.org Fixes: 6a227d5fd6c4 ("gma500: Add support for Cedarview") Signed-off-by: Ma Ke <make24(a)iscas.ac.cn> --- Changes in v4: - revised the recipient email list, apologize for the inadvertent mistake. Changes in v3: - added the recipient's email address, due to the prolonged absence of a response from the recipients. Changes in v2: - modified the patch according to suggestions from other patchs; - added Fixes line; - added Cc stable; - Link: https://lore.kernel.org/lkml/20240622072514.1867582-1-make24@iscas.ac.cn/T/ --- drivers/gpu/drm/gma500/cdv_intel_lvds.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c index f08a6803dc18..3adc2c9ab72d 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c +++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c @@ -311,6 +311,9 @@ static int cdv_intel_lvds_get_modes(struct drm_connector *connector) if (mode_dev->panel_fixed_mode != NULL) { struct drm_display_mode *mode = drm_mode_duplicate(dev, mode_dev->panel_fixed_mode); + if (!mode) + return 0; + drm_mode_probed_add(connector, mode); return 1; } -- 2.25.1

1 year

3
3
0 0

[PATCH net v4 3/4] ipv6: take care of scope when choosing the src addr

by Nicolas Dichtel

When the source address is selected, the scope must be checked. For example, if a loopback address is assigned to the vrf device, it must not be chosen for packets sent outside. CC: stable(a)vger.kernel.org Fixes: afbac6010aec ("net: ipv6: Address selection needs to consider L3 domains") Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Reviewed-by: David Ahern <dsahern(a)kernel.org> --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5c424a0e7232..4f2c5cc31015 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1873,7 +1873,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, master, &dst, scores, hiscore_idx); - if (scores[hiscore_idx].ifa) + if (scores[hiscore_idx].ifa && + scores[hiscore_idx].scopedist >= 0) goto out; } -- 2.43.1

1 year

1
0
0 0

[PATCH net v4 2/4] ipv6: fix source address selection with route leak

by Nicolas Dichtel

By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable(a)vger.kernel.org Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> --- include/net/ip6_route.h | 22 +++++++++++++++------- net/ipv6/ip6_output.c | 1 + net/ipv6/route.c | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a18ed24fed94..6dbdf60b342f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,18 +127,26 @@ void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, - unsigned int prefs, + unsigned int prefs, int l3mdev_index, struct in6_addr *saddr) { + struct net_device *l3mdev; + struct net_device *dev; + bool same_vrf; int err = 0; - if (f6i && f6i->fib6_prefsrc.plen) { - *saddr = f6i->fib6_prefsrc.addr; - } else { - struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + rcu_read_lock(); - err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); - } + l3mdev = dev_get_by_index_rcu(net, l3mdev_index); + if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) + dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; + if (f6i && f6i->fib6_prefsrc.plen && same_vrf) + *saddr = f6i->fib6_prefsrc.addr; + else + err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); + + rcu_read_unlock(); return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..784424ac4147 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1124,6 +1124,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, + fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d72ca0b086d..c9a9506b714d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5689,7 +5689,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } -- 2.43.1

1 year

1
0
0 0

[PATCH net v4 1/4] ipv4: fix source address selection with route leak

by Nicolas Dichtel

By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable(a)vger.kernel.org Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF") Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Reviewed-by: David Ahern <dsahern(a)kernel.org> --- net/ipv4/fib_semantics.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f669da98d11d..8956026bc0a2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2270,6 +2270,15 @@ void fib_select_path(struct net *net, struct fib_result *res, fib_select_default(fl4, res); check_saddr: - if (!fl4->saddr) - fl4->saddr = fib_result_prefsrc(net, res); + if (!fl4->saddr) { + struct net_device *l3mdev; + + l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev); + + if (!l3mdev || + l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev) + fl4->saddr = fib_result_prefsrc(net, res); + else + fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); + } } -- 2.43.1

1 year

1
0
0 0

[PATCH] mm: Fix PTE_AF handling in fault path on architectures with HW AF support

by Ram Tummala

Commit 3bd786f76de2 ("mm: convert do_set_pte() to set_pte_range()") replaced do_set_pte() with set_pte_range() and that introduced a regression in the following faulting path of non-anonymous vmas on CPUs with HW AF support. handle_pte_fault() do_pte_missing() do_fault() do_read_fault() || do_cow_fault() || do_shared_fault() finish_fault() set_pte_range() The polarity of prefault calculation is incorrect. This leads to prefault being incorrectly set for the faulting address. The following if check will incorrectly clear the PTE_AF bit instead of setting it and the access will fault again on the same address due to the missing PTE_AF bit. if (prefault && arch_wants_old_prefaulted_pte()) entry = pte_mkold(entry); On a subsequent fault on the same address, the faulting path will see a non NULL vmf->pte and instead of reaching the do_pte_missing() path, PTE_AF will be correctly set in handle_pte_fault() itself. Due to this bug, performance degradation in the fault handling path will be observed due to unnecessary double faulting. Cc: stable(a)vger.kernel.org Fixes: 3bd786f76de2 ("mm: convert do_set_pte() to set_pte_range()") Signed-off-by: Ram Tummala <rtummala(a)nvidia.com> --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 0a769f34bbb2..03263034a040 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4781,7 +4781,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio, { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; - bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE); + bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE); pte_t entry; flush_icache_pages(vma, page, nr); -- 2.34.1

1 year

5
4
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror July 2024