On 12/11/24 06:30, Matthew Wilcox (Oracle) wrote:
Today we account each page individually to the memcg, which works well enough, if a little inefficiently (N atomic operations per page instead of N per allocation). Unfortunately, the stats can get out of sync when i915 calls vmap() with VM_MAP_PUT_PAGES. The pages being passed were not allocated by vmalloc, so the MEMCG_VMALLOC counter was never incremented. But it is decremented when the pages are freed with vfree().
Solve all of this by tracking the memcg at the vm_struct level. This logic has to live in the memcontrol file as it calls several functions which are currently static.
Fixes: b944afc9d64d (mm: add a VM_MAP_PUT_PAGES flag for vmap) Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org
include/linux/memcontrol.h | 7 ++++++ include/linux/vmalloc.h | 3 +++ mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++++++ mm/vmalloc.c | 14 ++++++------ 4 files changed, 63 insertions(+), 7 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5502aa8e138e..83ebcadebba6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1676,6 +1676,10 @@ static inline struct obj_cgroup *get_obj_cgroup_from_current(void) int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); +int obj_cgroup_charge_vmalloc(struct obj_cgroup **objcgp,
unsigned int nr_pages, gfp_t gfp);
+void obj_cgroup_uncharge_vmalloc(struct obj_cgroup *objcgp,
unsigned int nr_pages);
extern struct static_key_false memcg_bpf_enabled_key; static inline bool memcg_bpf_enabled(void) @@ -1756,6 +1760,9 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order) { } +/* Must be macros to avoid dereferencing objcg in vm_struct */ +#define obj_cgroup_charge_vmalloc(objcgp, nr_pages, gfp) 0 +#define obj_cgroup_uncharge_vmalloc(objcg, nr_pages) do { } while (0) static inline struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) { return NULL; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..ec7c2d607382 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -60,6 +60,9 @@ struct vm_struct { #endif unsigned int nr_pages; phys_addr_t phys_addr; +#ifdef CONFIG_MEMCG
- struct obj_cgroup *objcg;
+#endif const void *caller; }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7b3503d12aaf..629bffc3e26d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5472,4 +5472,50 @@ static int __init mem_cgroup_swap_init(void) } subsys_initcall(mem_cgroup_swap_init); +/**
- obj_cgroup_charge_vmalloc - Charge vmalloc memory
- @objcgp: Pointer to an object cgroup
- @nr_pages: Number of pages
- @gfp: Memory allocation flags
- Return: 0 on success, negative errno on failure.
- */
+int obj_cgroup_charge_vmalloc(struct obj_cgroup **objcgp,
unsigned int nr_pages, gfp_t gfp)
+{
- struct obj_cgroup *objcg;
- int err;
Are we also responsible for setting *objcgp to NULL for return conditions (when we return before setting it)?
- if (mem_cgroup_disabled() || !(gfp & __GFP_ACCOUNT))
return 0;
Don't we want !memcg_kmem_online() instead of mem_cgroup_disabled()?
- objcg = current_obj_cgroup();
- if (!objcg)
return 0;
- err = obj_cgroup_charge_pages(objcg, gfp, nr_pages);
- if (err)
return err;
- obj_cgroup_get(objcg);
- mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_VMALLOC, nr_pages);
- *objcgp = objcg;
- return 0;
+}
<snip>
Balbir Singh