The patch below does not apply to the 4.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2395928158059b8f9858365fce7713ce7fef62e4 Mon Sep 17 00:00:00 2001
From: Rokudo Yan wu-yan@tcl.com Date: Thu, 25 Feb 2021 17:18:31 -0800 Subject: [PATCH] zsmalloc: account the number of compacted pages correctly
There exists multiple path may do zram compaction concurrently. 1. auto-compaction triggered during memory reclaim 2. userspace utils write zram<id>/compaction node
So, multiple threads may call zs_shrinker_scan/zs_compact concurrently. But pages_compacted is a per zsmalloc pool variable and modification of the variable is not serialized(through under class->lock). There are two issues here: 1. the pages_compacted may not equal to total number of pages freed(due to concurrently add). 2. zs_shrinker_scan may not return the correct number of pages freed(issued by current shrinker).
The fix is simple: 1. account the number of pages freed in zs_compact locally. 2. use actomic variable pages_compacted to accumulate total number.
Link: https://lkml.kernel.org/r/20210202122235.26885-1-wu-yan@tcl.com Fixes: 860c707dca155a56 ("zsmalloc: account the number of compacted pages") Signed-off-by: Rokudo Yan wu-yan@tcl.com Cc: Minchan Kim minchan@kernel.org Cc: Sergey Senozhatsky sergey.senozhatsky@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d7018543842e..a711a2e2a794 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1081,7 +1081,7 @@ static ssize_t mm_stat_show(struct device *dev, zram->limit_pages << PAGE_SHIFT, max_used << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.same_pages), - pool_stats.pages_compacted, + atomic_long_read(&pool_stats.pages_compacted), (u64)atomic64_read(&zram->stats.huge_pages), (u64)atomic64_read(&zram->stats.huge_pages_since)); up_read(&zram->init_lock); diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 4807ca4d52e0..2a430e713ce5 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -35,7 +35,7 @@ enum zs_mapmode {
struct zs_pool_stats { /* How many pages were migrated (freed) */ - unsigned long pages_compacted; + atomic_long_t pages_compacted; };
struct zs_pool; diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index cf0ed0e4e911..1518732f95c3 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2212,11 +2212,13 @@ static unsigned long zs_can_compact(struct size_class *class) return obj_wasted * class->pages_per_zspage; }
-static void __zs_compact(struct zs_pool *pool, struct size_class *class) +static unsigned long __zs_compact(struct zs_pool *pool, + struct size_class *class) { struct zs_compact_control cc; struct zspage *src_zspage; struct zspage *dst_zspage = NULL; + unsigned long pages_freed = 0;
spin_lock(&class->lock); while ((src_zspage = isolate_zspage(class, true))) { @@ -2246,7 +2248,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class) putback_zspage(class, dst_zspage); if (putback_zspage(class, src_zspage) == ZS_EMPTY) { free_zspage(pool, class, src_zspage); - pool->stats.pages_compacted += class->pages_per_zspage; + pages_freed += class->pages_per_zspage; } spin_unlock(&class->lock); cond_resched(); @@ -2257,12 +2259,15 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class) putback_zspage(class, src_zspage);
spin_unlock(&class->lock); + + return pages_freed; }
unsigned long zs_compact(struct zs_pool *pool) { int i; struct size_class *class; + unsigned long pages_freed = 0;
for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) { class = pool->size_class[i]; @@ -2270,10 +2275,11 @@ unsigned long zs_compact(struct zs_pool *pool) continue; if (class->index != i) continue; - __zs_compact(pool, class); + pages_freed += __zs_compact(pool, class); } + atomic_long_add(pages_freed, &pool->stats.pages_compacted);
- return pool->stats.pages_compacted; + return pages_freed; } EXPORT_SYMBOL_GPL(zs_compact);
@@ -2290,13 +2296,12 @@ static unsigned long zs_shrinker_scan(struct shrinker *shrinker, struct zs_pool *pool = container_of(shrinker, struct zs_pool, shrinker);
- pages_freed = pool->stats.pages_compacted; /* * Compact classes and calculate compaction delta. * Can run concurrently with a manually triggered * (by user) compaction. */ - pages_freed = zs_compact(pool) - pages_freed; + pages_freed = zs_compact(pool);
return pages_freed ? pages_freed : SHRINK_STOP; }
commit 2395928158059b8f9858365fce7713ce7fef62e4 upstream.
There exists multiple path may do zram compaction concurrently. 1. auto-compaction triggered during memory reclaim 2. userspace utils write zram<id>/compaction node
So, multiple threads may call zs_shrinker_scan/zs_compact concurrently. But pages_compacted is a per zsmalloc pool variable and modification of the variable is not serialized(through under class->lock). There are two issues here: 1. the pages_compacted may not equal to total number of pages freed(due to concurrently add). 2. zs_shrinker_scan may not return the correct number of pages freed(issued by current shrinker).
The fix is simple: 1. account the number of pages freed in zs_compact locally. 2. use actomic variable pages_compacted to accumulate total number.
Link: https://lkml.kernel.org/r/20210202122235.26885-1-wu-yan@tcl.com Fixes: 860c707dca155a56 ("zsmalloc: account the number of compacted pages") Signed-off-by: Rokudo Yan wu-yan@tcl.com Cc: Minchan Kim minchan@kernel.org Cc: Sergey Senozhatsky sergey.senozhatsky@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- drivers/block/zram/zram_drv.c | 2 +- include/linux/zsmalloc.h | 2 +- mm/zsmalloc.c | 17 +++++++++++------ 3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 616ee4f9c233..b243452d4788 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -450,7 +450,7 @@ static ssize_t mm_stat_show(struct device *dev, zram->limit_pages << PAGE_SHIFT, max_used << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.zero_pages), - pool_stats.pages_compacted); + atomic_long_read(&pool_stats.pages_compacted)); up_read(&zram->init_lock);
return ret; diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 34eb16098a33..05ca2acea8dc 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -36,7 +36,7 @@ enum zs_mapmode {
struct zs_pool_stats { /* How many pages were migrated (freed) */ - unsigned long pages_compacted; + atomic_long_t pages_compacted; };
struct zs_pool; diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c1ea19478119..8ebcab7b4d2f 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1745,11 +1745,13 @@ static unsigned long zs_can_compact(struct size_class *class) return obj_wasted * class->pages_per_zspage; }
-static void __zs_compact(struct zs_pool *pool, struct size_class *class) +static unsigned long __zs_compact(struct zs_pool *pool, + struct size_class *class) { struct zs_compact_control cc; struct page *src_page; struct page *dst_page = NULL; + unsigned long pages_freed = 0;
spin_lock(&class->lock); while ((src_page = isolate_source_page(class))) { @@ -1780,7 +1782,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
putback_zspage(pool, class, dst_page); if (putback_zspage(pool, class, src_page) == ZS_EMPTY) - pool->stats.pages_compacted += class->pages_per_zspage; + pages_freed += class->pages_per_zspage; spin_unlock(&class->lock); cond_resched(); spin_lock(&class->lock); @@ -1790,12 +1792,15 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class) putback_zspage(pool, class, src_page);
spin_unlock(&class->lock); + + return pages_freed; }
unsigned long zs_compact(struct zs_pool *pool) { int i; struct size_class *class; + unsigned long pages_freed = 0;
for (i = zs_size_classes - 1; i >= 0; i--) { class = pool->size_class[i]; @@ -1803,10 +1808,11 @@ unsigned long zs_compact(struct zs_pool *pool) continue; if (class->index != i) continue; - __zs_compact(pool, class); + pages_freed += __zs_compact(pool, class); } + atomic_long_add(pages_freed, &pool->stats.pages_compacted);
- return pool->stats.pages_compacted; + return pages_freed; } EXPORT_SYMBOL_GPL(zs_compact);
@@ -1823,13 +1829,12 @@ static unsigned long zs_shrinker_scan(struct shrinker *shrinker, struct zs_pool *pool = container_of(shrinker, struct zs_pool, shrinker);
- pages_freed = pool->stats.pages_compacted; /* * Compact classes and calculate compaction delta. * Can run concurrently with a manually triggered * (by user) compaction. */ - pages_freed = zs_compact(pool) - pages_freed; + pages_freed = zs_compact(pool);
return pages_freed ? pages_freed : SHRINK_STOP; }
On Thu, Mar 4, 2021 at 6:44 PM Rokudo Yan wu-yan@tcl.com wrote:
commit 2395928158059b8f9858365fce7713ce7fef62e4 upstream.
I'm not Greg, but I feel his pain.
I think it would have made it much easier for him, had you explicitly mentioned which stable tree each of your backported fixes is for, so that he doesn't have to guess (or try to match up git blob object ID names from the git patch).
Linus
commit 2395928158059b8f9858365fce7713ce7fef62e4 backported to 4.4-stable tree.
There exists multiple path may do zram compaction concurrently. 1. auto-compaction triggered during memory reclaim 2. userspace utils write zram<id>/compaction node
So, multiple threads may call zs_shrinker_scan/zs_compact concurrently. But pages_compacted is a per zsmalloc pool variable and modification of the variable is not serialized(through under class->lock). There are two issues here: 1. the pages_compacted may not equal to total number of pages freed(due to concurrently add). 2. zs_shrinker_scan may not return the correct number of pages freed(issued by current shrinker).
The fix is simple: 1. account the number of pages freed in zs_compact locally. 2. use actomic variable pages_compacted to accumulate total number.
Link: https://lkml.kernel.org/r/20210202122235.26885-1-wu-yan@tcl.com Fixes: 860c707dca155a56 ("zsmalloc: account the number of compacted pages") Signed-off-by: Rokudo Yan wu-yan@tcl.com Cc: Minchan Kim minchan@kernel.org Cc: Sergey Senozhatsky sergey.senozhatsky@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- drivers/block/zram/zram_drv.c | 2 +- include/linux/zsmalloc.h | 2 +- mm/zsmalloc.c | 17 +++++++++++------ 3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 616ee4f9c233..b243452d4788 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -450,7 +450,7 @@ static ssize_t mm_stat_show(struct device *dev, zram->limit_pages << PAGE_SHIFT, max_used << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.zero_pages), - pool_stats.pages_compacted); + atomic_long_read(&pool_stats.pages_compacted)); up_read(&zram->init_lock);
return ret; diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 34eb16098a33..05ca2acea8dc 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -36,7 +36,7 @@ enum zs_mapmode {
struct zs_pool_stats { /* How many pages were migrated (freed) */ - unsigned long pages_compacted; + atomic_long_t pages_compacted; };
struct zs_pool; diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c1ea19478119..8ebcab7b4d2f 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1745,11 +1745,13 @@ static unsigned long zs_can_compact(struct size_class *class) return obj_wasted * class->pages_per_zspage; }
-static void __zs_compact(struct zs_pool *pool, struct size_class *class) +static unsigned long __zs_compact(struct zs_pool *pool, + struct size_class *class) { struct zs_compact_control cc; struct page *src_page; struct page *dst_page = NULL; + unsigned long pages_freed = 0;
spin_lock(&class->lock); while ((src_page = isolate_source_page(class))) { @@ -1780,7 +1782,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
putback_zspage(pool, class, dst_page); if (putback_zspage(pool, class, src_page) == ZS_EMPTY) - pool->stats.pages_compacted += class->pages_per_zspage; + pages_freed += class->pages_per_zspage; spin_unlock(&class->lock); cond_resched(); spin_lock(&class->lock); @@ -1790,12 +1792,15 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class) putback_zspage(pool, class, src_page);
spin_unlock(&class->lock); + + return pages_freed; }
unsigned long zs_compact(struct zs_pool *pool) { int i; struct size_class *class; + unsigned long pages_freed = 0;
for (i = zs_size_classes - 1; i >= 0; i--) { class = pool->size_class[i]; @@ -1803,10 +1808,11 @@ unsigned long zs_compact(struct zs_pool *pool) continue; if (class->index != i) continue; - __zs_compact(pool, class); + pages_freed += __zs_compact(pool, class); } + atomic_long_add(pages_freed, &pool->stats.pages_compacted);
- return pool->stats.pages_compacted; + return pages_freed; } EXPORT_SYMBOL_GPL(zs_compact);
@@ -1823,13 +1829,12 @@ static unsigned long zs_shrinker_scan(struct shrinker *shrinker, struct zs_pool *pool = container_of(shrinker, struct zs_pool, shrinker);
- pages_freed = pool->stats.pages_compacted; /* * Compact classes and calculate compaction delta. * Can run concurrently with a manually triggered * (by user) compaction. */ - pages_freed = zs_compact(pool) - pages_freed; + pages_freed = zs_compact(pool);
return pages_freed ? pages_freed : SHRINK_STOP; }
linux-stable-mirror@lists.linaro.org