The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 8cd44dd1d17a23d5cc8c443c659ca57aa76e2fa5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024080751-importer-postbox-eb90@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
8cd44dd1d17a ("btrfs: zoned: fix zone_unusable accounting on making block group read-write again")
9d4b0a129a0d ("btrfs: simplify arguments of btrfs_update_space_info and rename")
6a921de58992 ("btrfs: zoned: introduce space_info->active_total_bytes")
f6fca3917b4d ("btrfs: store chunk size in space-info struct")
b8bea09a456f ("btrfs: add trace event for submitted RAID56 bio")
c67c68eb57f1 ("btrfs: use integrated bitmaps for btrfs_raid_bio::dbitmap and finish_pbitmap")
143823cf4d5a ("btrfs: fix typos in comments")
385de0ef387d ("btrfs: use a normal workqueue for rmw_workers")
a7b8e39c922b ("btrfs: raid56: enable subpage support for RAID56")
3907ce293d68 ("btrfs: raid56: make alloc_rbio_essential_pages() subpage compatible")
ac26df8b3b02 ("btrfs: raid56: remove btrfs_raid_bio::bio_pages array")
07e4d3808047 ("btrfs: raid56: make __raid_recover_endio_io() subpage compatible")
46900662d02f ("btrfs: raid56: make finish_parity_scrub() subpage compatible")
3e77605d6a81 ("btrfs: raid56: make rbio_add_io_page() subpage compatible")
00425dd976d3 ("btrfs: raid56: introduce btrfs_raid_bio::bio_sectors")
eb3570607c8c ("btrfs: raid56: introduce btrfs_raid_bio::stripe_sectors")
94efbe19b9f1 ("btrfs: raid56: introduce new cached members for btrfs_raid_bio")
29b068382c6f ("btrfs: raid56: make btrfs_raid_bio more compact")
843de58b3e31 ("btrfs: raid56: open code rbio_nr_pages()")
cc353a8be2fd ("btrfs: reduce width for stripe_len from u64 to u32")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8cd44dd1d17a23d5cc8c443c659ca57aa76e2fa5 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota(a)wdc.com>
Date: Wed, 15 Feb 2023 09:18:02 +0900
Subject: [PATCH] btrfs: zoned: fix zone_unusable accounting on making block
group read-write again
When btrfs makes a block group read-only, it adds all free regions in the
block group to space_info->bytes_readonly. That free space excludes
reserved and pinned regions. OTOH, when btrfs makes the block group
read-write again, it moves all the unused regions into the block group's
zone_unusable. That unused region includes reserved and pinned regions.
As a result, it counts too much zone_unusable bytes.
Fortunately (or unfortunately), having erroneous zone_unusable does not
affect the calculation of space_info->bytes_readonly, because free
space (num_bytes in btrfs_dec_block_group_ro) calculation is done based on
the erroneous zone_unusable and it reduces the num_bytes just to cancel the
error.
This behavior can be easily discovered by adding a WARN_ON to check e.g,
"bg->pinned > 0" in btrfs_dec_block_group_ro(), and running fstests test
case like btrfs/282.
Fix it by properly considering pinned and reserved in
btrfs_dec_block_group_ro(). Also, add a WARN_ON and introduce
btrfs_space_info_update_bytes_zone_unusable() to catch a similar mistake.
Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
CC: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Naohiro Aota <naohiro.aota(a)wdc.com>
Reviewed-by: Josef Bacik <josef(a)toxicpanda.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn(a)wdc.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 498442d0c216..2e49d978f504 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1223,8 +1223,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
block_group->space_info->total_bytes -= block_group->length;
block_group->space_info->bytes_readonly -=
(block_group->length - block_group->zone_unusable);
- block_group->space_info->bytes_zone_unusable -=
- block_group->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info,
+ -block_group->zone_unusable);
block_group->space_info->disk_total -= block_group->length * factor;
spin_unlock(&block_group->space_info->lock);
@@ -1396,7 +1396,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
if (btrfs_is_zoned(cache->fs_info)) {
/* Migrate zone_unusable bytes to readonly */
sinfo->bytes_readonly += cache->zone_unusable;
- sinfo->bytes_zone_unusable -= cache->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
+ -cache->zone_unusable);
cache->zone_unusable = 0;
}
cache->ro++;
@@ -3056,9 +3057,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
if (btrfs_is_zoned(cache->fs_info)) {
/* Migrate zone_unusable bytes back */
cache->zone_unusable =
- (cache->alloc_offset - cache->used) +
+ (cache->alloc_offset - cache->used - cache->pinned -
+ cache->reserved) +
(cache->length - cache->zone_capacity);
- sinfo->bytes_zone_unusable += cache->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
+ cache->zone_unusable);
sinfo->bytes_readonly -= cache->zone_unusable;
}
num_bytes = cache->length - cache->reserved -
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d77498e7671c..ff9f0d41987e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2793,7 +2793,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
readonly = true;
} else if (btrfs_is_zoned(fs_info)) {
/* Need reset before reusing in a zoned block group */
- space_info->bytes_zone_unusable += len;
+ btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info,
+ len);
readonly = true;
}
spin_unlock(&cache->lock);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3f9b7507543a..f5996a43db24 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2723,8 +2723,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
* If the block group is read-only, we should account freed space into
* bytes_readonly.
*/
- if (!block_group->ro)
+ if (!block_group->ro) {
block_group->zone_unusable += to_unusable;
+ WARN_ON(block_group->zone_unusable > block_group->length);
+ }
spin_unlock(&ctl->tree_lock);
if (!used) {
spin_lock(&block_group->lock);
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index c1d9d3664400..68e14fd48638 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -316,7 +316,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
found->bytes_used += block_group->used;
found->disk_used += block_group->used * factor;
found->bytes_readonly += block_group->bytes_super;
- found->bytes_zone_unusable += block_group->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable);
if (block_group->length > 0)
found->full = 0;
btrfs_try_granting_tickets(info, found);
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 4db8a0267c16..88b44221ce97 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -249,6 +249,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \
DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info");
DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
+DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable");
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index eeb56975bee7..de55a555d95b 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2383,6 +2383,14 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned,
TP_ARGS(fs_info, sinfo, old, diff)
);
+DEFINE_EVENT(btrfs__space_info_update, update_bytes_zone_unusable,
+
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo, u64 old, s64 diff),
+
+ TP_ARGS(fs_info, sinfo, old, diff)
+);
+
DECLARE_EVENT_CLASS(btrfs_raid56_bio,
TP_PROTO(const struct btrfs_raid_bio *rbio,
On Fri, 12 Jul 2024 14:21:09 +0200, Greg Kroah-Hartman wrote:
> In the Linux kernel, the following vulnerability has been resolved:
>
> netfilter: ipset: Fix race between namespace cleanup and gc in the list:set type
>
> Lion Ackermann reported that there is a race condition between namespace cleanup
> in ipset and the garbage collection of the list:set type. The namespace
> cleanup can destroy the list:set type of sets while the gc of the set type is
> waiting to run in rcu cleanup. The latter uses data from the destroyed set which
> thus leads use after free. The patch contains the following parts:
>
> - When destroying all sets, first remove the garbage collectors, then wait
> if needed and then destroy the sets.
> - Fix the badly ordered "wait then remove gc" for the destroy a single set
> case.
> - Fix the missing rcu locking in the list:set type in the userspace test
> case.
> - Use proper RCU list handlings in the list:set type.
>
> The patch depends on c1193d9bbbd3 (netfilter: ipset: Add list flush to cancel_gc).
This commit does not exist in stable kernels. Please backport it.
netfilter: ipset: Add list flush to cancel_gc
Flushing list in cancel_gc drops references to other lists right away,
without waiting for RCU to destroy list. Fixes race when referenced
ipsets can't be destroyed while referring list is scheduled for destroy.
Since this is missing, the CVE fix potentially introduced new races as
it makes use of RCU.
Thanks,
Siddh
The F2FS ioctls for starting and committing atomic writes check for
inode_owner_or_capable(), but this does not give LSMs like SELinux or
Landlock an opportunity to deny the write access - if the caller's FSUID
matches the inode's UID, inode_owner_or_capable() immediately returns true.
There are scenarios where LSMs want to deny a process the ability to write
particular files, even files that the FSUID of the process owns; but this
can currently partially be bypassed using atomic write ioctls in two ways:
- F2FS_IOC_START_ATOMIC_REPLACE + F2FS_IOC_COMMIT_ATOMIC_WRITE can
truncate an inode to size 0
- F2FS_IOC_START_ATOMIC_WRITE + F2FS_IOC_ABORT_ATOMIC_WRITE can revert
changes another process concurrently made to a file
Fix it by requiring FMODE_WRITE for these operations, just like for
F2FS_IOC_MOVE_RANGE. Since any legitimate caller should only be using these
ioctls when intending to write into the file, that seems unlikely to break
anything.
Fixes: 88b88a667971 ("f2fs: support atomic writes")
Cc: stable(a)vger.kernel.org
Signed-off-by: Jann Horn <jannh(a)google.com>
---
fs/f2fs/file.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 168f08507004..a662392c5d8b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2117,12 +2117,15 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode *pinode;
loff_t isize;
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -2225,12 +2228,15 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
static int f2fs_ioc_commit_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -2257,12 +2263,15 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
static int f2fs_ioc_abort_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
struct mnt_idmap *idmap = file_mnt_idmap(filp);
int ret;
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+
if (!inode_owner_or_capable(idmap, inode))
return -EACCES;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
---
base-commit: b446a2dae984fa5bd56dd7c3a02a426f87e05813
change-id: 20240806-f2fs-atomic-write-e019a47823de
--
Jann Horn <jannh(a)google.com>
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024071559-reptilian-chaffing-a991@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
5596d9e8b553 ("mm/hugetlb: fix potential race in __update_and_free_hugetlb_folio()")
bd225530a4c7 ("mm/hugetlb_vmemmap: fix race with speculative PFN walkers")
51718e25c53f ("mm: convert arch_clear_hugepage_flags to take a folio")
831bc31a5e82 ("mm: hugetlb: improve the handling of hugetlb allocation failure for freed or in-use hugetlb")
ebc20dcac4ce ("mm: hugetlb_vmemmap: convert page to folio")
c5ad3233ead5 ("hugetlb_vmemmap: use folio argument for hugetlb_vmemmap_* functions")
c24f188b2289 ("hugetlb: batch TLB flushes when restoring vmemmap")
f13b83fdd996 ("hugetlb: batch TLB flushes when freeing vmemmap")
f4b7e3efaddb ("hugetlb: batch PMD split for bulk vmemmap dedup")
91f386bf0772 ("hugetlb: batch freeing of vmemmap pages")
cfb8c75099db ("hugetlb: perform vmemmap restoration on a list of pages")
79359d6d24df ("hugetlb: perform vmemmap optimization on a list of pages")
d67e32f26713 ("hugetlb: restructure pool allocations")
d2cf88c27f51 ("hugetlb: optimize update_and_free_pages_bulk to avoid lock cycles")
30a89adf872d ("hugetlb: check for hugetlb folio before vmemmap_restore")
d5b43e9683ec ("hugetlb: convert remove_pool_huge_page() to remove_pool_hugetlb_folio()")
04bbfd844b99 ("hugetlb: remove a few calls to page_folio()")
fde1c4ecf916 ("mm: hugetlb: skip initialization of gigantic tail struct pages if freed by HVO")
3ee0aa9f0675 ("mm: move some shrinker-related function declarations to mm/internal.h")
d8f5f7e445f0 ("hugetlb: set hugetlb page flag before optimizing vmemmap")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5596d9e8b553dacb0ac34bcf873cbbfb16c3ba3e Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe(a)huawei.com>
Date: Mon, 8 Jul 2024 10:51:27 +0800
Subject: [PATCH] mm/hugetlb: fix potential race in
__update_and_free_hugetlb_folio()
There is a potential race between __update_and_free_hugetlb_folio() and
try_memory_failure_hugetlb():
CPU1 CPU2
__update_and_free_hugetlb_folio try_memory_failure_hugetlb
folio_test_hugetlb
-- It's still hugetlb folio.
folio_clear_hugetlb_hwpoison
spin_lock_irq(&hugetlb_lock);
__get_huge_page_for_hwpoison
folio_set_hugetlb_hwpoison
spin_unlock_irq(&hugetlb_lock);
spin_lock_irq(&hugetlb_lock);
__folio_clear_hugetlb(folio);
-- Hugetlb flag is cleared but too late.
spin_unlock_irq(&hugetlb_lock);
When the above race occurs, raw error page info will be leaked. Even
worse, raw error pages won't have hwpoisoned flag set and hit
pcplists/buddy. Fix this issue by deferring
folio_clear_hugetlb_hwpoison() until __folio_clear_hugetlb() is done. So
all raw error pages will have hwpoisoned flag set.
Link: https://lkml.kernel.org/r/20240708025127.107713-1-linmiaohe@huawei.com
Fixes: 32c877191e02 ("hugetlb: do not clear hugetlb dtor until allocating vmemmap")
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
Acked-by: Muchun Song <muchun.song(a)linux.dev>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2afb70171b76..fe44324d6383 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1725,13 +1725,6 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
return;
}
- /*
- * Move PageHWPoison flag from head page to the raw error pages,
- * which makes any healthy subpages reusable.
- */
- if (unlikely(folio_test_hwpoison(folio)))
- folio_clear_hugetlb_hwpoison(folio);
-
/*
* If vmemmap pages were allocated above, then we need to clear the
* hugetlb flag under the hugetlb lock.
@@ -1742,6 +1735,13 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
spin_unlock_irq(&hugetlb_lock);
}
+ /*
+ * Move PageHWPoison flag from head page to the raw error pages,
+ * which makes any healthy subpages reusable.
+ */
+ if (unlikely(folio_test_hwpoison(folio)))
+ folio_clear_hugetlb_hwpoison(folio);
+
folio_ref_unfreeze(folio, 1);
/*
On Wed, Aug 07, 2024 at 06:00:11AM +0300, ahmed Ehab wrote:
> On Sat, Aug 3, 2024 at 3:51 AM Boqun Feng <boqun.feng(a)gmail.com> wrote:
>
> > On Mon, Jul 15, 2024 at 04:26:38PM +0300, botta633 wrote:
> > > From: Ahmed Ehab <bottaawesome633(a)gmail.com>
> > >
> > > Checking if the lockdep_map->name will change when setting the subclass.
> > > It shouldn't change so that the lock class and subclass will have the
> > same
> > > name
> > >
> > > Reported-by: <syzbot+7f4a6f7f7051474e40ad(a)syzkaller.appspotmail.com>
> > > Fixes: de8f5e4f2dc1f ("lockdep: Introduce wait-type checks")
> > > Cc: <stable(a)vger.kernel.org>
> >
> > You seems to miss my comment at v2:
> >
> > https://lore.kernel.org/lkml/ZpRKcHNZfsMuACRG@boqun-archlinux/
> >
> > , i.e. you don't need the Reported-by, Fixes and Cc tag for the patch
> > that adds a test case.
> >
> > > Signed-off-by: Ahmed Ehab <bottaawesome633(a)gmail.com>
> > > ---
> > > v3->v4:
> > > - Fixed subject line truncation.
> > >
> > > lib/locking-selftest.c | 21 +++++++++++++++++++++
> > > 1 file changed, 21 insertions(+)
> > >
> > > diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
> > > index 6f6a5fc85b42..aeed613799ca 100644
> > > --- a/lib/locking-selftest.c
> > > +++ b/lib/locking-selftest.c
> > > @@ -2710,6 +2710,25 @@ static void local_lock_3B(void)
> > >
> > > }
> > >
> > > + /**
> >
> > ^ there is a tailing space here, next time you can detect this by using
> > checkpatch. Also "/**" style is especially for function signature
> > comment, you could just use a "/*" here.
> >
> > > + * after setting the subclass the lockdep_map.name changes
> > > + * if we initialize a new string literal for the subclass
> > > + * we will have a new name pointer
> > > + */
> > > +static void class_subclass_X1_name_test(void)
> > > +{
> > > + printk("
> > --------------------------------------------------------------------------\n");
> > > + printk(" | class and subclass name test|\n");
> > > + printk(" ---------------------\n");
> > > + const char *name_before_setting_subclass = rwsem_X1.dep_map.name;
> > > + const char *name_after_setting_subclass;
> > > +
> > > + WARN_ON(!rwsem_X1.dep_map.name);
> > > + lockdep_set_subclass(&rwsem_X1, 1);
> > > + name_after_setting_subclass = rwsem_X1.dep_map.name;
> > > + WARN_ON(name_before_setting_subclass !=
> > name_after_setting_subclass);
> > > +}
> > > +
> > > static void local_lock_tests(void)
> > > {
> > > printk("
> > --------------------------------------------------------------------------\n");
> > > @@ -2916,6 +2935,8 @@ void locking_selftest(void)
> > >
> > > local_lock_tests();
> > >
> > > + class_subclass_X1_name_test();
> > > +
> >
> > I got this in the serial log:
> >
> > [ 0.619454]
> > --------------------------------------------------------------------------
> > [ 0.621463] | local_lock tests |
> > [ 0.622326] ---------------------
> > [ 0.623211] local_lock inversion 2: ok |
> > [ 0.624904] local_lock inversion 3A: ok |
> > [ 0.626740] local_lock inversion 3B: ok |
> > [ 0.628492]
> > --------------------------------------------------------------------------
> > [ 0.630513] | class and subclass name test|
> > [ 0.631614] ---------------------
> > [ 0.632502] hardirq_unsafe_softirq_safe: ok |
> >
> > two problems here:
> >
> > 1) The "class and subclass name test" line interrupts the output of
> > testsuite "local_lock tests".
> >
> > 2) Instead of a WARN_ON(), could you look into using dotest() to
> > print "ok" if the test passes, which is consistent with other
> >
> tests.
> >
>
> I wrote it this way:
> static void lock_class_subclass_X1(void)
> {
> const char *name_before_setting_subclass = rwsem_X1.dep_map.name;
> const char *name_after_setting_subclass;
>
> lockdep_set_subclass(&rwsem_X1, 1);
> name_after_setting_subclass = rwsem_X1.dep_map.name;
> debug_locks = name_before_setting_subclass == name_after_setting_subclass;
I think you could use:
DEBUG_LOCK_WARN_ON(name_before_setting_subclass != name_after_setting_subclass);
here.
Regards,
Boqun
> }
> ...
> static void class_subclass_X1_name_test(void)
> {
> printk("
> --------------------------------------------------------------------------\n");
> printk(" | class and subclass name test|\n");
> printk(" ---------------------\n");
>
> print_testname("lock class and subclass same name");
> dotest(lock_class_subclass_X1, SUCCESS, LOCKTYPE_RWSEM);
> pr_cont("\n");
> }
> However, assigning a value to debug_locks seems very uncommon. I tried to
> check other test cases; however, they seem to rely on the method they are
> testing. Do you have a suggestion for my scenario if I want to compare the
> names before and after setting the subclass?
> Or you suggest that I follow a different approach other than comparing the
> names such as checking debug_locks in lockdep_init_map_type and returning
> when we have multiple instantiations for lock->name?
>
> >
> > Could you please fix all above problems and send another version of this
> > patch (no need to resend the first one)? Thanks!
> >
> > Regards,
> > Boqun
> >
> > > print_testname("hardirq_unsafe_softirq_safe");
> > > dotest(hardirq_deadlock_softirq_not_deadlock, FAILURE,
> > LOCKTYPE_SPECIAL);
> > > pr_cont("\n");
> > > --
> > > 2.45.2
> > >
> >
>
> Regards,
> Ahmed
From: Michal Kubiak <michal.kubiak(a)intel.com>
The initialization of vport interrupt consists of two functions:
1) idpf_vport_intr_init() where a generic configuration is done
2) idpf_vport_intr_req_irq() where the irq for each q_vector is
requested.
The first function used to create a base name for each interrupt using
"kasprintf()" call. Unfortunately, although that call allocated memory
for a text buffer, that memory was never released.
Fix this by removing creating the interrupt base name in 1).
Instead, always create a full interrupt name in the function 2), because
there is no need to create a base name separately, considering that the
function 2) is never called out of idpf_vport_intr_init() context.
Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport")
Cc: stable(a)vger.kernel.org # 6.7
Signed-off-by: Michal Kubiak <michal.kubiak(a)intel.com>
Reviewed-by: Pavan Kumar Linga <pavan.kumar.linga(a)intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin(a)intel.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Tested-by: Krishneil Singh <krishneil.k.singh(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
---
drivers/net/ethernet/intel/idpf/idpf_txrx.c | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index af2879f03b8d..a2f9f252694a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3780,13 +3780,15 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
/**
* idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport
* @vport: main vport structure
- * @basename: name for the vector
*/
-static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
+static int idpf_vport_intr_req_irq(struct idpf_vport *vport)
{
struct idpf_adapter *adapter = vport->adapter;
+ const char *drv_name, *if_name, *vec_name;
int vector, err, irq_num, vidx;
- const char *vec_name;
+
+ drv_name = dev_driver_string(&adapter->pdev->dev);
+ if_name = netdev_name(vport->netdev);
for (vector = 0; vector < vport->num_q_vectors; vector++) {
struct idpf_q_vector *q_vector = &vport->q_vectors[vector];
@@ -3804,8 +3806,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename)
else
continue;
- name = kasprintf(GFP_KERNEL, "%s-%s-%d", basename, vec_name,
- vidx);
+ name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name,
+ vec_name, vidx);
err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0,
name, q_vector);
@@ -4326,7 +4328,6 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport)
*/
int idpf_vport_intr_init(struct idpf_vport *vport)
{
- char *int_name;
int err;
err = idpf_vport_intr_init_vec_idx(vport);
@@ -4340,11 +4341,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
if (err)
goto unroll_vectors_alloc;
- int_name = kasprintf(GFP_KERNEL, "%s-%s",
- dev_driver_string(&vport->adapter->pdev->dev),
- vport->netdev->name);
-
- err = idpf_vport_intr_req_irq(vport, int_name);
+ err = idpf_vport_intr_req_irq(vport);
if (err)
goto unroll_vectors_alloc;
--
2.42.0
To prevent potential error return values, it is necessary to check the
return value of btf__type_by_id. We can add a kind checking to fix the
issue.
Cc: stable(a)vger.kernel.org
Fixes: 430025e5dca5 ("libbpf: Add subskeleton scaffolding")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
tools/lib/bpf/libbpf.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a3be6f8fac09..d1eb45d16054 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -13850,6 +13850,9 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
var = btf_var_secinfos(map_type);
for (i = 0; i < len; i++, var++) {
var_type = btf__type_by_id(btf, var->type);
+ if (!var_type)
+ return libbpf_err(-ENOENT);
+
var_name = btf__name_by_offset(btf, var_type->name_off);
if (strcmp(var_name, var_skel->name) == 0) {
*var_skel->addr = map->mmaped + var->offset;
--
2.25.1
The patch titled
Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Waiman Long <longman(a)redhat.com>
Subject: mm/memory-failure: use raw_spinlock_t in struct memory_failure_cpu
Date: Tue, 6 Aug 2024 12:41:07 -0400
The memory_failure_cpu structure is a per-cpu structure. Access to its
content requires the use of get_cpu_var() to lock in the current CPU and
disable preemption. The use of a regular spinlock_t for locking purpose
is fine for a non-RT kernel.
Since the integration of RT spinlock support into the v5.15 kernel, a
spinlock_t in a RT kernel becomes a sleeping lock and taking a sleeping
lock in a preemption disabled context is illegal resulting in the
following kind of warning.
[12135.732244] BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48
[12135.732248] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 270076, name: kworker/0:0
[12135.732252] preempt_count: 1, expected: 0
[12135.732255] RCU nest depth: 2, expected: 2
:
[12135.732420] Hardware name: Dell Inc. PowerEdge R640/0HG0J8, BIOS 2.10.2 02/24/2021
[12135.732423] Workqueue: kacpi_notify acpi_os_execute_deferred
[12135.732433] Call Trace:
[12135.732436] <TASK>
[12135.732450] dump_stack_lvl+0x57/0x81
[12135.732461] __might_resched.cold+0xf4/0x12f
[12135.732479] rt_spin_lock+0x4c/0x100
[12135.732491] memory_failure_queue+0x40/0xe0
[12135.732503] ghes_do_memory_failure+0x53/0x390
[12135.732516] ghes_do_proc.constprop.0+0x229/0x3e0
[12135.732575] ghes_proc+0xf9/0x1a0
[12135.732591] ghes_notify_hed+0x6a/0x150
[12135.732602] notifier_call_chain+0x43/0xb0
[12135.732626] blocking_notifier_call_chain+0x43/0x60
[12135.732637] acpi_ev_notify_dispatch+0x47/0x70
[12135.732648] acpi_os_execute_deferred+0x13/0x20
[12135.732654] process_one_work+0x41f/0x500
[12135.732695] worker_thread+0x192/0x360
[12135.732715] kthread+0x111/0x140
[12135.732733] ret_from_fork+0x29/0x50
[12135.732779] </TASK>
Fix it by using a raw_spinlock_t for locking instead. Also move the
pr_err() out of the lock critical section to avoid indeterminate latency
of this call.
Link: https://lkml.kernel.org/r/20240806164107.1044956-1-longman@redhat.com
Fixes: ea8f5fb8a71f ("HWPoison: add memory_failure_queue()")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: "Huang, Ying" <ying.huang(a)intel.com>
Cc: Juri Lelli <juri.lelli(a)redhat.com>
Cc: Len Brown <len.brown(a)intel.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu
+++ a/mm/memory-failure.c
@@ -2417,7 +2417,7 @@ struct memory_failure_entry {
struct memory_failure_cpu {
DECLARE_KFIFO(fifo, struct memory_failure_entry,
MEMORY_FAILURE_FIFO_SIZE);
- spinlock_t lock;
+ raw_spinlock_t lock;
struct work_struct work;
};
@@ -2443,19 +2443,21 @@ void memory_failure_queue(unsigned long
{
struct memory_failure_cpu *mf_cpu;
unsigned long proc_flags;
+ bool buffer_overflow;
struct memory_failure_entry entry = {
.pfn = pfn,
.flags = flags,
};
mf_cpu = &get_cpu_var(memory_failure_cpu);
- spin_lock_irqsave(&mf_cpu->lock, proc_flags);
- if (kfifo_put(&mf_cpu->fifo, entry))
+ raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+ buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry);
+ if (!buffer_overflow)
schedule_work_on(smp_processor_id(), &mf_cpu->work);
- else
+ raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+ if (buffer_overflow)
pr_err("buffer overflow when queuing memory failure at %#lx\n",
pfn);
- spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
put_cpu_var(memory_failure_cpu);
}
EXPORT_SYMBOL_GPL(memory_failure_queue);
@@ -2469,9 +2471,9 @@ static void memory_failure_work_func(str
mf_cpu = container_of(work, struct memory_failure_cpu, work);
for (;;) {
- spin_lock_irqsave(&mf_cpu->lock, proc_flags);
+ raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags);
gotten = kfifo_get(&mf_cpu->fifo, &entry);
- spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
+ raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
if (entry.flags & MF_SOFT_OFFLINE)
@@ -2501,7 +2503,7 @@ static int __init memory_failure_init(vo
for_each_possible_cpu(cpu) {
mf_cpu = &per_cpu(memory_failure_cpu, cpu);
- spin_lock_init(&mf_cpu->lock);
+ raw_spin_lock_init(&mf_cpu->lock);
INIT_KFIFO(mf_cpu->fifo);
INIT_WORK(&mf_cpu->work, memory_failure_work_func);
}
_
Patches currently in -mm which might be from longman(a)redhat.com are
padata-fix-possible-divide-by-0-panic-in-padata_mt_helper.patch
mm-memory-failure-use-raw_spinlock_t-in-struct-memory_failure_cpu.patch
watchdog-handle-the-enodev-failure-case-of-lockup_detector_delay_init-separately.patch