From: Joonsoo Kim iamjoonsoo.kim@lge.com
Currently, preventing cma area in page allocation is implemented by using current_gfp_context(). However, there are two problems of this implementation.
First, this doesn't work for allocation fastpath. In the fastpath, original gfp_mask is used since current_gfp_context() is introduced in order to control reclaim and it is on slowpath. Second, clearing __GFP_MOVABLE has a side effect to exclude the memory on the ZONE_MOVABLE for allocation target.
To fix these problems, this patch changes the implementation to exclude cma area in page allocation. Main point of this change is using the alloc_flags. alloc_flags is mainly used to control allocation so it fits for excluding cma area in allocation.
Fixes: d7fefcc (mm/cma: add PF flag to force non cma alloc) Cc: stable@vger.kernel.org Signed-off-by: Joonsoo Kim iamjoonsoo.kim@lge.com --- include/linux/sched/mm.h | 4 ---- mm/page_alloc.c | 27 +++++++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 44ad5b7..a73847a 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -191,10 +191,6 @@ static inline gfp_t current_gfp_context(gfp_t flags) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; -#ifdef CONFIG_CMA - if (pflags & PF_MEMALLOC_NOCMA) - flags &= ~__GFP_MOVABLE; -#endif } return flags; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6416d08..cd53894 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2791,7 +2791,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, * allocating from CMA when over half of the zone's free memory * is in the CMA area. */ - if (migratetype == MIGRATE_MOVABLE && + if (alloc_flags & ALLOC_CMA && zone_page_state(zone, NR_FREE_CMA_PAGES) > zone_page_state(zone, NR_FREE_PAGES) / 2) { page = __rmqueue_cma_fallback(zone, order); @@ -2802,7 +2802,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, retry: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { - if (migratetype == MIGRATE_MOVABLE) + if (alloc_flags & ALLOC_CMA) page = __rmqueue_cma_fallback(zone, order);
if (!page && __rmqueue_fallback(zone, order, migratetype, @@ -3502,11 +3502,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (likely(!alloc_harder)) unusable_free += z->nr_reserved_highatomic;
-#ifdef CONFIG_CMA /* If allocation can't use CMA areas don't use free CMA pages */ - if (!(alloc_flags & ALLOC_CMA)) + if (IS_ENABLED(CONFIG_CMA) && !(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); -#endif
return unusable_free; } @@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) return alloc_flags; }
+static inline void current_alloc_flags(gfp_t gfp_mask, + unsigned int *alloc_flags) +{ + unsigned int pflags = READ_ONCE(current->flags); + + if (!(pflags & PF_MEMALLOC_NOCMA) && + gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE) + *alloc_flags |= ALLOC_CMA; +} + /* * get_page_from_freelist goes through the zonelist trying to allocate * a page. @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct pglist_data *last_pgdat_dirty_limit = NULL; bool no_fallback;
+ current_alloc_flags(gfp_mask, &alloc_flags); + retry: /* * Scan zonelist, looking for a zone with enough free. @@ -4339,10 +4349,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask) } else if (unlikely(rt_task(current)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER;
-#ifdef CONFIG_CMA - if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE) - alloc_flags |= ALLOC_CMA; -#endif return alloc_flags; }
@@ -4808,9 +4814,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return false;
- if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE) - *alloc_flags |= ALLOC_CMA; - return true; }
On 7/15/20 7:05 AM, js1304@gmail.com wrote:
From: Joonsoo Kim iamjoonsoo.kim@lge.com
Currently, preventing cma area in page allocation is implemented by using current_gfp_context(). However, there are two problems of this implementation.
First, this doesn't work for allocation fastpath. In the fastpath, original gfp_mask is used since current_gfp_context() is introduced in order to control reclaim and it is on slowpath. Second, clearing __GFP_MOVABLE has a side effect to exclude the memory on the ZONE_MOVABLE for allocation target.
To fix these problems, this patch changes the implementation to exclude cma area in page allocation. Main point of this change is using the alloc_flags. alloc_flags is mainly used to control allocation so it fits for excluding cma area in allocation.
Agreed, should have been done with ALLOC_CMA since the beginning.
Fixes: d7fefcc (mm/cma: add PF flag to force non cma alloc)
More digits please. Fixes: d7fefcc8de91 ("mm/cma: add PF flag to force non cma alloc")
Cc: stable@vger.kernel.org Signed-off-by: Joonsoo Kim iamjoonsoo.kim@lge.com
include/linux/sched/mm.h | 4 ---- mm/page_alloc.c | 27 +++++++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 44ad5b7..a73847a 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -191,10 +191,6 @@ static inline gfp_t current_gfp_context(gfp_t flags) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS;
Above this hunk you should also remove PF_MEMALLOC_NOCMA from the test.
-#ifdef CONFIG_CMA
if (pflags & PF_MEMALLOC_NOCMA)
flags &= ~__GFP_MOVABLE;
-#endif } return flags; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6416d08..cd53894 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2791,7 +2791,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, * allocating from CMA when over half of the zone's free memory * is in the CMA area. */
- if (migratetype == MIGRATE_MOVABLE &&
- if (alloc_flags & ALLOC_CMA && zone_page_state(zone, NR_FREE_CMA_PAGES) > zone_page_state(zone, NR_FREE_PAGES) / 2) { page = __rmqueue_cma_fallback(zone, order);
@@ -2802,7 +2802,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, retry: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) {
if (migratetype == MIGRATE_MOVABLE)
if (alloc_flags & ALLOC_CMA) page = __rmqueue_cma_fallback(zone, order);
if (!page && __rmqueue_fallback(zone, order, migratetype, @@ -3502,11 +3502,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (likely(!alloc_harder)) unusable_free += z->nr_reserved_highatomic; -#ifdef CONFIG_CMA /* If allocation can't use CMA areas don't use free CMA pages */
- if (!(alloc_flags & ALLOC_CMA))
- if (IS_ENABLED(CONFIG_CMA) && !(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif return unusable_free; } @@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) return alloc_flags; } +static inline void current_alloc_flags(gfp_t gfp_mask,
unsigned int *alloc_flags)
+{
- unsigned int pflags = READ_ONCE(current->flags);
- if (!(pflags & PF_MEMALLOC_NOCMA) &&
gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
*alloc_flags |= ALLOC_CMA;
+}
I don't like the modification through parameter, would just do what current_gfp_context() does and return the modified value. Also make it a no-op (including no READ_ONCE(current->flags)) if !CONFIG_CMA, please.
/*
- get_page_from_freelist goes through the zonelist trying to allocate
- a page.
@@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct pglist_data *last_pgdat_dirty_limit = NULL; bool no_fallback;
- current_alloc_flags(gfp_mask, &alloc_flags);
I don't see why to move the test here? It will still be executed in the fastpath, if that's what you wanted to avoid.
retry: /* * Scan zonelist, looking for a zone with enough free. @@ -4339,10 +4349,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask) } else if (unlikely(rt_task(current)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER; -#ifdef CONFIG_CMA
- if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
-#endif
I would just replace this here with: alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
return alloc_flags; } @@ -4808,9 +4814,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return false;
- if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
*alloc_flags |= ALLOC_CMA;
And same here... Ah, I see. current_alloc_flags() should probably take a migratetype parameter instead of gfp_mask then.
- return true;
}
2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka vbabka@suse.cz님이 작성:
On 7/15/20 7:05 AM, js1304@gmail.com wrote:
From: Joonsoo Kim iamjoonsoo.kim@lge.com
Currently, preventing cma area in page allocation is implemented by using current_gfp_context(). However, there are two problems of this implementation.
First, this doesn't work for allocation fastpath. In the fastpath, original gfp_mask is used since current_gfp_context() is introduced in order to control reclaim and it is on slowpath. Second, clearing __GFP_MOVABLE has a side effect to exclude the memory on the ZONE_MOVABLE for allocation target.
To fix these problems, this patch changes the implementation to exclude cma area in page allocation. Main point of this change is using the alloc_flags. alloc_flags is mainly used to control allocation so it fits for excluding cma area in allocation.
Agreed, should have been done with ALLOC_CMA since the beginning.
Fixes: d7fefcc (mm/cma: add PF flag to force non cma alloc)
More digits please. Fixes: d7fefcc8de91 ("mm/cma: add PF flag to force non cma alloc")
Will do.
Cc: stable@vger.kernel.org Signed-off-by: Joonsoo Kim iamjoonsoo.kim@lge.com
include/linux/sched/mm.h | 4 ---- mm/page_alloc.c | 27 +++++++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 44ad5b7..a73847a 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -191,10 +191,6 @@ static inline gfp_t current_gfp_context(gfp_t flags) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS;
Above this hunk you should also remove PF_MEMALLOC_NOCMA from the test.
Will do.
-#ifdef CONFIG_CMA
if (pflags & PF_MEMALLOC_NOCMA)
flags &= ~__GFP_MOVABLE;
-#endif } return flags; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6416d08..cd53894 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2791,7 +2791,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, * allocating from CMA when over half of the zone's free memory * is in the CMA area. */
if (migratetype == MIGRATE_MOVABLE &&
if (alloc_flags & ALLOC_CMA && zone_page_state(zone, NR_FREE_CMA_PAGES) > zone_page_state(zone, NR_FREE_PAGES) / 2) { page = __rmqueue_cma_fallback(zone, order);
@@ -2802,7 +2802,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, retry: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) {
if (migratetype == MIGRATE_MOVABLE)
if (alloc_flags & ALLOC_CMA) page = __rmqueue_cma_fallback(zone, order); if (!page && __rmqueue_fallback(zone, order, migratetype,
@@ -3502,11 +3502,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (likely(!alloc_harder)) unusable_free += z->nr_reserved_highatomic;
-#ifdef CONFIG_CMA /* If allocation can't use CMA areas don't use free CMA pages */
if (!(alloc_flags & ALLOC_CMA))
if (IS_ENABLED(CONFIG_CMA) && !(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
return unusable_free;
} @@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) return alloc_flags; }
+static inline void current_alloc_flags(gfp_t gfp_mask,
unsigned int *alloc_flags)
+{
unsigned int pflags = READ_ONCE(current->flags);
if (!(pflags & PF_MEMALLOC_NOCMA) &&
gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
*alloc_flags |= ALLOC_CMA;
+}
I don't like the modification through parameter, would just do what current_gfp_context() does and return the modified value. Also make it a no-op (including no READ_ONCE(current->flags)) if !CONFIG_CMA, please.
Okay.
/*
- get_page_from_freelist goes through the zonelist trying to allocate
- a page.
@@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct pglist_data *last_pgdat_dirty_limit = NULL; bool no_fallback;
current_alloc_flags(gfp_mask, &alloc_flags);
I don't see why to move the test here? It will still be executed in the fastpath, if that's what you wanted to avoid.
I want to execute it on the fastpath, too. Reason that I moved it here is that alloc_flags could be reset on slowpath. See the code where __gfp_pfmemalloc_flags() is on. This is the only place that I can apply this option to all the allocation paths at once.
Thanks.
retry: /* * Scan zonelist, looking for a zone with enough free. @@ -4339,10 +4349,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask) } else if (unlikely(rt_task(current)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER;
-#ifdef CONFIG_CMA
if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
-#endif
I would just replace this here with: alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
return alloc_flags;
}
@@ -4808,9 +4814,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return false;
if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
*alloc_flags |= ALLOC_CMA;
And same here... Ah, I see. current_alloc_flags() should probably take a migratetype parameter instead of gfp_mask then.
return true;
}
On 7/16/20 9:27 AM, Joonsoo Kim wrote:
2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka vbabka@suse.cz님이 작성:
/*
- get_page_from_freelist goes through the zonelist trying to allocate
- a page.
@@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct pglist_data *last_pgdat_dirty_limit = NULL; bool no_fallback;
current_alloc_flags(gfp_mask, &alloc_flags);
I don't see why to move the test here? It will still be executed in the fastpath, if that's what you wanted to avoid.
I want to execute it on the fastpath, too. Reason that I moved it here is that alloc_flags could be reset on slowpath. See the code where __gfp_pfmemalloc_flags() is on. This is the only place that I can apply this option to all the allocation paths at once.
But get_page_from_freelist() might be called multiple times in the slowpath, and also anyone looking for gfp and alloc flags setup will likely not examine this function. I don't see a problem in having it in two places that already deal with alloc_flags setup, as it is now.
Thanks.
retry: /* * Scan zonelist, looking for a zone with enough free. @@ -4339,10 +4349,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask) } else if (unlikely(rt_task(current)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER;
-#ifdef CONFIG_CMA
if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
-#endif
I would just replace this here with: alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
return alloc_flags;
}
@@ -4808,9 +4814,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return false;
if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
*alloc_flags |= ALLOC_CMA;
And same here... Ah, I see. current_alloc_flags() should probably take a migratetype parameter instead of gfp_mask then.
return true;
}
2020년 7월 16일 (목) 오후 4:45, Vlastimil Babka vbabka@suse.cz님이 작성:
On 7/16/20 9:27 AM, Joonsoo Kim wrote:
2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka vbabka@suse.cz님이 작성:
/*
- get_page_from_freelist goes through the zonelist trying to allocate
- a page.
@@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct pglist_data *last_pgdat_dirty_limit = NULL; bool no_fallback;
current_alloc_flags(gfp_mask, &alloc_flags);
I don't see why to move the test here? It will still be executed in the fastpath, if that's what you wanted to avoid.
I want to execute it on the fastpath, too. Reason that I moved it here is that alloc_flags could be reset on slowpath. See the code where __gfp_pfmemalloc_flags() is on. This is the only place that I can apply this option to all the allocation paths at once.
But get_page_from_freelist() might be called multiple times in the slowpath, and also anyone looking for gfp and alloc flags setup will likely not examine this function. I don't see a problem in having it in two places that already deal with alloc_flags setup, as it is now.
I agree that anyone looking alloc flags will miss that function easily. Okay. I will place it on its original place, although we now need to add one more place. *Three places* are gfp_to_alloc_flags(), prepare_alloc_pages() and __gfp_pfmemalloc_flags().
Thanks.
On 7/17/20 9:29 AM, Joonsoo Kim wrote:
2020년 7월 16일 (목) 오후 4:45, Vlastimil Babka vbabka@suse.cz님이 작성:
On 7/16/20 9:27 AM, Joonsoo Kim wrote:
2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka vbabka@suse.cz님이 작성:
/*
- get_page_from_freelist goes through the zonelist trying to allocate
- a page.
@@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct pglist_data *last_pgdat_dirty_limit = NULL; bool no_fallback;
current_alloc_flags(gfp_mask, &alloc_flags);
I don't see why to move the test here? It will still be executed in the fastpath, if that's what you wanted to avoid.
I want to execute it on the fastpath, too. Reason that I moved it here is that alloc_flags could be reset on slowpath. See the code where __gfp_pfmemalloc_flags() is on. This is the only place that I can apply this option to all the allocation paths at once.
But get_page_from_freelist() might be called multiple times in the slowpath, and also anyone looking for gfp and alloc flags setup will likely not examine this function. I don't see a problem in having it in two places that already deal with alloc_flags setup, as it is now.
I agree that anyone looking alloc flags will miss that function easily. Okay. I will place it on its original place, although we now need to add one more place. *Three places* are gfp_to_alloc_flags(), prepare_alloc_pages() and __gfp_pfmemalloc_flags().
Hm the check below should also work for ALLOC_OOM|ALLOC_NOCMA then.
/* Avoid allocations with no watermarks from looping endlessly */ if (tsk_is_oom_victim(current) && (alloc_flags == ALLOC_OOM || (gfp_mask & __GFP_NOMEMALLOC))) goto nopage;
Maybe it's simpler to change get_page_from_freelist() then. But document well.
Thanks.
On 7/17/20 10:10 AM, Vlastimil Babka wrote:
On 7/17/20 9:29 AM, Joonsoo Kim wrote:
2020년 7월 16일 (목) 오후 4:45, Vlastimil Babka vbabka@suse.cz님이 작성:
On 7/16/20 9:27 AM, Joonsoo Kim wrote:
2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka vbabka@suse.cz님이 작성:
/*
- get_page_from_freelist goes through the zonelist trying to allocate
- a page.
@@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct pglist_data *last_pgdat_dirty_limit = NULL; bool no_fallback;
current_alloc_flags(gfp_mask, &alloc_flags);
I don't see why to move the test here? It will still be executed in the fastpath, if that's what you wanted to avoid.
I want to execute it on the fastpath, too. Reason that I moved it here is that alloc_flags could be reset on slowpath. See the code where __gfp_pfmemalloc_flags() is on. This is the only place that I can apply this option to all the allocation paths at once.
But get_page_from_freelist() might be called multiple times in the slowpath, and also anyone looking for gfp and alloc flags setup will likely not examine this function. I don't see a problem in having it in two places that already deal with alloc_flags setup, as it is now.
I agree that anyone looking alloc flags will miss that function easily. Okay. I will place it on its original place, although we now need to add one more place. *Three places* are gfp_to_alloc_flags(), prepare_alloc_pages() and __gfp_pfmemalloc_flags().
Hm the check below should also work for ALLOC_OOM|ALLOC_NOCMA then.
/* Avoid allocations with no watermarks from looping endlessly */ if (tsk_is_oom_victim(current) && (alloc_flags == ALLOC_OOM || (gfp_mask & __GFP_NOMEMALLOC))) goto nopage;
Maybe it's simpler to change get_page_from_freelist() then. But document well.
But then we have e.g. should_reclaim_retry() which calls __zone_watermark_ok() where ALLOC_CMA plays a role too, so that means we should have alloc_mask set up correctly wrt ALLOC_CMA at the __alloc_pages_slowpath() level...
Thanks.
2020년 7월 17일 (금) 오후 5:15, Vlastimil Babka vbabka@suse.cz님이 작성:
On 7/17/20 10:10 AM, Vlastimil Babka wrote:
On 7/17/20 9:29 AM, Joonsoo Kim wrote:
2020년 7월 16일 (목) 오후 4:45, Vlastimil Babka vbabka@suse.cz님이 작성:
On 7/16/20 9:27 AM, Joonsoo Kim wrote:
2020년 7월 15일 (수) 오후 5:24, Vlastimil Babka vbabka@suse.cz님이 작성:
> /* > * get_page_from_freelist goes through the zonelist trying to allocate > * a page. > @@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, > struct pglist_data *last_pgdat_dirty_limit = NULL; > bool no_fallback; > > + current_alloc_flags(gfp_mask, &alloc_flags);
I don't see why to move the test here? It will still be executed in the fastpath, if that's what you wanted to avoid.
I want to execute it on the fastpath, too. Reason that I moved it here is that alloc_flags could be reset on slowpath. See the code where __gfp_pfmemalloc_flags() is on. This is the only place that I can apply this option to all the allocation paths at once.
But get_page_from_freelist() might be called multiple times in the slowpath, and also anyone looking for gfp and alloc flags setup will likely not examine this function. I don't see a problem in having it in two places that already deal with alloc_flags setup, as it is now.
I agree that anyone looking alloc flags will miss that function easily. Okay. I will place it on its original place, although we now need to add one more place. *Three places* are gfp_to_alloc_flags(), prepare_alloc_pages() and __gfp_pfmemalloc_flags().
Hm the check below should also work for ALLOC_OOM|ALLOC_NOCMA then.
/* Avoid allocations with no watermarks from looping endlessly */ if (tsk_is_oom_victim(current) && (alloc_flags == ALLOC_OOM || (gfp_mask & __GFP_NOMEMALLOC))) goto nopage;
Maybe it's simpler to change get_page_from_freelist() then. But document well.
But then we have e.g. should_reclaim_retry() which calls __zone_watermark_ok() where ALLOC_CMA plays a role too, so that means we should have alloc_mask set up correctly wrt ALLOC_CMA at the __alloc_pages_slowpath() level...
Good catch! Hmm... Okay. It would be necessarily handled in three places. I will fix it on the next version. Anyway, we need some clean-up about alloc_flags handling since it looks not good for maintenance.
Thanks.
On Wed 15-07-20 14:05:26, Joonsoo Kim wrote:
From: Joonsoo Kim iamjoonsoo.kim@lge.com
Currently, preventing cma area in page allocation is implemented by using current_gfp_context(). However, there are two problems of this implementation.
First, this doesn't work for allocation fastpath. In the fastpath, original gfp_mask is used since current_gfp_context() is introduced in order to control reclaim and it is on slowpath. Second, clearing __GFP_MOVABLE has a side effect to exclude the memory on the ZONE_MOVABLE for allocation target.
This can be especially a problem with movable_node configurations where a large portion of the memory is in movable zones.
To fix these problems, this patch changes the implementation to exclude cma area in page allocation. Main point of this change is using the alloc_flags. alloc_flags is mainly used to control allocation so it fits for excluding cma area in allocation.
The approach is sensible and the patch makes sense to me from a quick glance but I am not really familiar with all subtle details about cma integration with the allocator so I do not feel confident to provide my ack.
Thanks!
Fixes: d7fefcc (mm/cma: add PF flag to force non cma alloc) Cc: stable@vger.kernel.org Signed-off-by: Joonsoo Kim iamjoonsoo.kim@lge.com
include/linux/sched/mm.h | 4 ---- mm/page_alloc.c | 27 +++++++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 44ad5b7..a73847a 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -191,10 +191,6 @@ static inline gfp_t current_gfp_context(gfp_t flags) flags &= ~(__GFP_IO | __GFP_FS); else if (pflags & PF_MEMALLOC_NOFS) flags &= ~__GFP_FS; -#ifdef CONFIG_CMA
if (pflags & PF_MEMALLOC_NOCMA)
flags &= ~__GFP_MOVABLE;
-#endif } return flags; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6416d08..cd53894 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2791,7 +2791,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, * allocating from CMA when over half of the zone's free memory * is in the CMA area. */
- if (migratetype == MIGRATE_MOVABLE &&
- if (alloc_flags & ALLOC_CMA && zone_page_state(zone, NR_FREE_CMA_PAGES) > zone_page_state(zone, NR_FREE_PAGES) / 2) { page = __rmqueue_cma_fallback(zone, order);
@@ -2802,7 +2802,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, retry: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) {
if (migratetype == MIGRATE_MOVABLE)
if (alloc_flags & ALLOC_CMA) page = __rmqueue_cma_fallback(zone, order);
if (!page && __rmqueue_fallback(zone, order, migratetype, @@ -3502,11 +3502,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (likely(!alloc_harder)) unusable_free += z->nr_reserved_highatomic; -#ifdef CONFIG_CMA /* If allocation can't use CMA areas don't use free CMA pages */
- if (!(alloc_flags & ALLOC_CMA))
- if (IS_ENABLED(CONFIG_CMA) && !(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif return unusable_free; } @@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) return alloc_flags; } +static inline void current_alloc_flags(gfp_t gfp_mask,
unsigned int *alloc_flags)
+{
- unsigned int pflags = READ_ONCE(current->flags);
- if (!(pflags & PF_MEMALLOC_NOCMA) &&
gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
*alloc_flags |= ALLOC_CMA;
+}
/*
- get_page_from_freelist goes through the zonelist trying to allocate
- a page.
@@ -3706,6 +3714,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, struct pglist_data *last_pgdat_dirty_limit = NULL; bool no_fallback;
- current_alloc_flags(gfp_mask, &alloc_flags);
retry: /* * Scan zonelist, looking for a zone with enough free. @@ -4339,10 +4349,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask) } else if (unlikely(rt_task(current)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER; -#ifdef CONFIG_CMA
- if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
-#endif return alloc_flags; } @@ -4808,9 +4814,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return false;
- if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
*alloc_flags |= ALLOC_CMA;
- return true;
} -- 2.7.4
From: js1304@gmail.com
Sent: 15 July 2020 06:05 From: Joonsoo Kim iamjoonsoo.kim@lge.com
Currently, preventing cma area in page allocation is implemented by using current_gfp_context(). However, there are two problems of this implementation.
...
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6416d08..cd53894 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
...
@@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) return alloc_flags; }
+static inline void current_alloc_flags(gfp_t gfp_mask,
unsigned int *alloc_flags)
+{
- unsigned int pflags = READ_ONCE(current->flags);
- if (!(pflags & PF_MEMALLOC_NOCMA) &&
gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
*alloc_flags |= ALLOC_CMA;
+}
I'd guess this would be easier to understand and probably generate better code if renamed and used as: alloc_flags |= can_alloc_cma(gpf_mask);
Given it is a 'static inline' the compiler might end up generating the same code. If you needed to clear a flag doing: alloc_flags = current_alloc_flags(gpf_mask, alloc_flags); is much better for non-inlined function.
David
- Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
2020년 7월 17일 (금) 오후 5:32, David Laight David.Laight@aculab.com님이 작성:
From: js1304@gmail.com
Sent: 15 July 2020 06:05 From: Joonsoo Kim iamjoonsoo.kim@lge.com
Currently, preventing cma area in page allocation is implemented by using current_gfp_context(). However, there are two problems of this implementation.
...
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6416d08..cd53894 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
...
@@ -3693,6 +3691,16 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) return alloc_flags; }
+static inline void current_alloc_flags(gfp_t gfp_mask,
unsigned int *alloc_flags)
+{
unsigned int pflags = READ_ONCE(current->flags);
if (!(pflags & PF_MEMALLOC_NOCMA) &&
gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
*alloc_flags |= ALLOC_CMA;
+}
I'd guess this would be easier to understand and probably generate better code if renamed and used as: alloc_flags |= can_alloc_cma(gpf_mask);
Given it is a 'static inline' the compiler might end up generating the same code. If you needed to clear a flag doing: alloc_flags = current_alloc_flags(gpf_mask, alloc_flags); is much better for non-inlined function.
Vlastimil suggested this way and I have agreed with that. Anyway, thanks for the suggestion.
Thanks.
linux-stable-mirror@lists.linaro.org