As noted by Michal, the blkg_iostat_set's in the lockless list hold
reference to blkg's to protect against their removal. Those blkg's
hold reference to blkcg. When a cgroup is being destroyed,
cgroup_rstat_flush() is only called at css_release_work_fn() which
is called when the blkcg reference count reaches 0. This circular
dependency will prevent blkcg and some blkgs from being freed after
they are made offline.
It is less a problem if the cgroup to be destroyed also has other
controllers like memory that will call cgroup_rstat_flush() which will
clean up the reference count. If block is the only controller that uses
rstat, these offline blkcg and blkgs may never be freed leaking more
and more memory over time.
To prevent this potential memory leak:
- flush blkcg per-cpu stats list in __blkg_release(), when no new stat
can be added
- add global blkg_stat_lock for covering concurrent parent blkg stat
update
- don't grab bio->bi_blkg reference when adding the stats into blkcg's
per-cpu stat list since all stats are guaranteed to be consumed before
releasing blkg instance, and grabbing blkg reference for stats was the
most fragile part of original patch
Based on Waiman's patch:
https://lore.kernel.org/linux-block/20221215033132.230023-3-longman@redhat.…
Fixes: 3b8cc6298724 ("blk-cgroup: Optimize blkcg_rstat_flush()")
Cc: stable(a)vger.kernel.org
Reported-by: Jay Shin <jaeshin(a)redhat.com>
Acked-by: Tejun Heo <tj(a)kernel.org>
Cc: Waiman Long <longman(a)redhat.com>
Cc: mkoutny(a)suse.com
Cc: Yosry Ahmed <yosryahmed(a)google.com>
Signed-off-by: Ming Lei <ming.lei(a)redhat.com>
Link: https://lore.kernel.org/r/20230609234249.1412858-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
---
Context difference with linus tree: 2c275afeb61d ("block: make blkcg_punt_bio_submit
optional") adds '#ifdef CONFIG_BLK_CGROUP_PUNT_BIO' in __blkg_release().
block/blk-cgroup.c | 40 +++++++++++++++++++++++++++++++---------
1 file changed, 31 insertions(+), 9 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 75bad5d60c9f..dd6d1c0117b1 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -35,6 +35,8 @@
#include "blk-throttle.h"
#include "blk-rq-qos.h"
+static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu);
+
/*
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
* blkcg_pol_register_mutex nests outside of it and synchronizes entire
@@ -58,6 +60,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
bool blkcg_debug_stats = false;
static struct workqueue_struct *blkcg_punt_bio_wq;
+static DEFINE_RAW_SPINLOCK(blkg_stat_lock);
+
#define BLKG_DESTROY_BATCH_SIZE 64
/*
@@ -165,8 +169,18 @@ static void blkg_free(struct blkcg_gq *blkg)
static void __blkg_release(struct rcu_head *rcu)
{
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+ struct blkcg *blkcg = blkg->blkcg;
+ int cpu;
WARN_ON(!bio_list_empty(&blkg->async_bios));
+ /*
+ * Flush all the non-empty percpu lockless lists before releasing
+ * us, given these stat belongs to us.
+ *
+ * blkg_stat_lock is for serializing blkg stat update
+ */
+ for_each_possible_cpu(cpu)
+ __blkcg_rstat_flush(blkcg, cpu);
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
@@ -888,23 +902,26 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
-static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
{
- struct blkcg *blkcg = css_to_blkcg(css);
struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
struct llist_node *lnode;
struct blkg_iostat_set *bisc, *next_bisc;
- /* Root-level stats are sourced from system-wide IO stats */
- if (!cgroup_parent(css->cgroup))
- return;
-
rcu_read_lock();
lnode = llist_del_all(lhead);
if (!lnode)
goto out;
+ /*
+ * For covering concurrent parent blkg update from blkg_release().
+ *
+ * When flushing from cgroup, cgroup_rstat_lock is always held, so
+ * this lock won't cause contention most of time.
+ */
+ raw_spin_lock(&blkg_stat_lock);
+
/*
* Iterate only the iostat_cpu's queued in the lockless list.
*/
@@ -928,13 +945,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (parent && parent->parent)
blkcg_iostat_update(parent, &blkg->iostat.cur,
&blkg->iostat.last);
- percpu_ref_put(&blkg->refcnt);
}
-
+ raw_spin_unlock(&blkg_stat_lock);
out:
rcu_read_unlock();
}
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+ /* Root-level stats are sourced from system-wide IO stats */
+ if (cgroup_parent(css->cgroup))
+ __blkcg_rstat_flush(css_to_blkcg(css), cpu);
+}
+
/*
* We source root cgroup stats from the system-wide stats to avoid
* tracking the same information twice and incurring overhead when no
@@ -2063,7 +2086,6 @@ void blk_cgroup_bio_start(struct bio *bio)
llist_add(&bis->lnode, lhead);
WRITE_ONCE(bis->lqueued, true);
- percpu_ref_get(&bis->blkg->refcnt);
}
u64_stats_update_end_irqrestore(&bis->sync, flags);
--
2.40.1
Hi,
Not sure why this was backported in the first place, but if so you'd
also need 1fb1abc83636 ("um: Fix build w/o CONFIG_PM_SLEEP").
I think b58294ce1a8a ("um: Allow PM with suspend-to-idle") should just
be reverted, but picking up the fix for it also works.
Robot keeps reporting to me that it's broken :)
Thanks,
johannes
Following regressions found on stable rc 5.4 while building MIPS configs,
Reported-by: Linux Kernel Functional Testing <lkft(a)linaro.org>
MIPS: Restore Au1300 support
[ Upstream commit f2041708dee30a3425f680265c337acd28293782 ]
Build log:
======
arch/mips/kernel/cpu-probe.c: In function 'cpu_probe':
arch/mips/kernel/cpu-probe.c:2125:9: error: duplicate case value
2125 | case PRID_COMP_NETLOGIC:
| ^~~~
arch/mips/kernel/cpu-probe.c:2099:9: note: previously used here
2099 | case PRID_COMP_NETLOGIC:
| ^~~~
make[3]: *** [scripts/Makefile.build:262: arch/mips/kernel/cpu-probe.o] Error 1
Links:
- https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-5.4.y/build/v5.4.2…
- https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-5.4.y/build/v5.4.2…
--
Linaro LKFT
https://lkft.linaro.org
Struct bd6107_platform_data refers to a platform device within
the Linux device hierarchy. The test in bd6107_backlight_check_fb()
compares it against the fbdev device in struct fb_info.dev, which
is different. Fix the test by comparing to struct fb_info.device.
Fixes a bug in the backlight driver and prepares fbdev for making
struct fb_info.dev optional.
v2:
* move renames into separate patch (Javier, Sam, Michael)
Fixes: 67b43e590415 ("backlight: Add ROHM BD6107 backlight driver")
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: Laurent Pinchart <laurent.pinchart+renesas(a)ideasonboard.com>
Cc: Lee Jones <lee(a)kernel.org>
Cc: Daniel Thompson <daniel.thompson(a)linaro.org>
Cc: Jingoo Han <jingoohan1(a)gmail.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: <stable(a)vger.kernel.org> # v3.12+
Reviewed-by: Javier Martinez Canillas <javierm(a)redhat.com>
Reviewed-by: Sam Ravnborg <sam(a)ravnborg.org>
Reviewed-by: Daniel Thompson <daniel.thompson(a)linaro.org>
---
drivers/video/backlight/bd6107.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/video/backlight/bd6107.c b/drivers/video/backlight/bd6107.c
index f4db6c064635b..e3410444ea235 100644
--- a/drivers/video/backlight/bd6107.c
+++ b/drivers/video/backlight/bd6107.c
@@ -104,7 +104,7 @@ static int bd6107_backlight_check_fb(struct backlight_device *backlight,
{
struct bd6107 *bd = bl_get_data(backlight);
- return bd->pdata->fbdev == NULL || bd->pdata->fbdev == info->dev;
+ return bd->pdata->fbdev == NULL || bd->pdata->fbdev == info->device;
}
static const struct backlight_ops bd6107_backlight_ops = {
--
2.41.0