The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f2f29da9f0d4367f6ff35e0d9d021257bb53e273
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041748-cosmetics-maritime-e503@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f2f29da9f0d4367f6ff35e0d9d021257bb53e273 Mon Sep 17 00:00:00 2001
From: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Date: Sun, 6 Apr 2025 11:45:22 +0700
Subject: [PATCH] x86/e820: Fix handling of subpage regions when calculating
nosave ranges in e820__register_nosave_regions()
While debugging kexec/hibernation hangs and crashes, it turned out that
the current implementation of e820__register_nosave_regions() suffers from
multiple serious issues:
- The end of last region is tracked by PFN, causing it to find holes
that aren't there if two consecutive subpage regions are present
- The nosave PFN ranges derived from holes are rounded out (instead of
rounded in) which makes it inconsistent with how explicitly reserved
regions are handled
Fix this by:
- Treating reserved regions as if they were holes, to ensure consistent
handling (rounding out nosave PFN ranges is more correct as the
kernel does not use partial pages)
- Tracking the end of the last RAM region by address instead of pages
to detect holes more precisely
These bugs appear to have been introduced about ~18 years ago with the very
first version of e820_mark_nosave_regions(), and its flawed assumptions were
carried forward uninterrupted through various waves of rewrites and renames.
[ mingo: Added Git archeology details, for kicks and giggles. ]
Fixes: e8eff5ac294e ("[PATCH] Make swsusp avoid memory holes and reserved memory regions on x86_64")
Reported-by: Roberto Ricci <io(a)r-ricci.it>
Tested-by: Roberto Ricci <io(a)r-ricci.it>
Signed-off-by: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: Len Brown <len.brown(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250406-fix-e820-nosave-v3-1-f3787bc1ee1d@qtmlab…
Closes: https://lore.kernel.org/all/Z4WFjBVHpndct7br@desktop0a/
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 57120f0749cc..9d8dd8deb2a7 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
void __init e820__register_nosave_regions(unsigned long limit_pfn)
{
int i;
- unsigned long pfn = 0;
+ u64 last_addr = 0;
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
- if (pfn < PFN_UP(entry->addr))
- register_nosave_region(pfn, PFN_UP(entry->addr));
-
- pfn = PFN_DOWN(entry->addr + entry->size);
-
if (entry->type != E820_TYPE_RAM)
- register_nosave_region(PFN_UP(entry->addr), pfn);
+ continue;
- if (pfn >= limit_pfn)
- break;
+ if (last_addr < entry->addr)
+ register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr));
+
+ last_addr = entry->addr + entry->size;
}
+
+ register_nosave_region(PFN_DOWN(last_addr), limit_pfn);
}
#ifdef CONFIG_ACPI
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x f2f29da9f0d4367f6ff35e0d9d021257bb53e273
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041744-grill-debit-4fe2@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f2f29da9f0d4367f6ff35e0d9d021257bb53e273 Mon Sep 17 00:00:00 2001
From: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Date: Sun, 6 Apr 2025 11:45:22 +0700
Subject: [PATCH] x86/e820: Fix handling of subpage regions when calculating
nosave ranges in e820__register_nosave_regions()
While debugging kexec/hibernation hangs and crashes, it turned out that
the current implementation of e820__register_nosave_regions() suffers from
multiple serious issues:
- The end of last region is tracked by PFN, causing it to find holes
that aren't there if two consecutive subpage regions are present
- The nosave PFN ranges derived from holes are rounded out (instead of
rounded in) which makes it inconsistent with how explicitly reserved
regions are handled
Fix this by:
- Treating reserved regions as if they were holes, to ensure consistent
handling (rounding out nosave PFN ranges is more correct as the
kernel does not use partial pages)
- Tracking the end of the last RAM region by address instead of pages
to detect holes more precisely
These bugs appear to have been introduced about ~18 years ago with the very
first version of e820_mark_nosave_regions(), and its flawed assumptions were
carried forward uninterrupted through various waves of rewrites and renames.
[ mingo: Added Git archeology details, for kicks and giggles. ]
Fixes: e8eff5ac294e ("[PATCH] Make swsusp avoid memory holes and reserved memory regions on x86_64")
Reported-by: Roberto Ricci <io(a)r-ricci.it>
Tested-by: Roberto Ricci <io(a)r-ricci.it>
Signed-off-by: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: Len Brown <len.brown(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250406-fix-e820-nosave-v3-1-f3787bc1ee1d@qtmlab…
Closes: https://lore.kernel.org/all/Z4WFjBVHpndct7br@desktop0a/
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 57120f0749cc..9d8dd8deb2a7 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
void __init e820__register_nosave_regions(unsigned long limit_pfn)
{
int i;
- unsigned long pfn = 0;
+ u64 last_addr = 0;
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
- if (pfn < PFN_UP(entry->addr))
- register_nosave_region(pfn, PFN_UP(entry->addr));
-
- pfn = PFN_DOWN(entry->addr + entry->size);
-
if (entry->type != E820_TYPE_RAM)
- register_nosave_region(PFN_UP(entry->addr), pfn);
+ continue;
- if (pfn >= limit_pfn)
- break;
+ if (last_addr < entry->addr)
+ register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr));
+
+ last_addr = entry->addr + entry->size;
}
+
+ register_nosave_region(PFN_DOWN(last_addr), limit_pfn);
}
#ifdef CONFIG_ACPI
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x f2f29da9f0d4367f6ff35e0d9d021257bb53e273
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041739-serrated-neatly-8262@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f2f29da9f0d4367f6ff35e0d9d021257bb53e273 Mon Sep 17 00:00:00 2001
From: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Date: Sun, 6 Apr 2025 11:45:22 +0700
Subject: [PATCH] x86/e820: Fix handling of subpage regions when calculating
nosave ranges in e820__register_nosave_regions()
While debugging kexec/hibernation hangs and crashes, it turned out that
the current implementation of e820__register_nosave_regions() suffers from
multiple serious issues:
- The end of last region is tracked by PFN, causing it to find holes
that aren't there if two consecutive subpage regions are present
- The nosave PFN ranges derived from holes are rounded out (instead of
rounded in) which makes it inconsistent with how explicitly reserved
regions are handled
Fix this by:
- Treating reserved regions as if they were holes, to ensure consistent
handling (rounding out nosave PFN ranges is more correct as the
kernel does not use partial pages)
- Tracking the end of the last RAM region by address instead of pages
to detect holes more precisely
These bugs appear to have been introduced about ~18 years ago with the very
first version of e820_mark_nosave_regions(), and its flawed assumptions were
carried forward uninterrupted through various waves of rewrites and renames.
[ mingo: Added Git archeology details, for kicks and giggles. ]
Fixes: e8eff5ac294e ("[PATCH] Make swsusp avoid memory holes and reserved memory regions on x86_64")
Reported-by: Roberto Ricci <io(a)r-ricci.it>
Tested-by: Roberto Ricci <io(a)r-ricci.it>
Signed-off-by: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: Len Brown <len.brown(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250406-fix-e820-nosave-v3-1-f3787bc1ee1d@qtmlab…
Closes: https://lore.kernel.org/all/Z4WFjBVHpndct7br@desktop0a/
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 57120f0749cc..9d8dd8deb2a7 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
void __init e820__register_nosave_regions(unsigned long limit_pfn)
{
int i;
- unsigned long pfn = 0;
+ u64 last_addr = 0;
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
- if (pfn < PFN_UP(entry->addr))
- register_nosave_region(pfn, PFN_UP(entry->addr));
-
- pfn = PFN_DOWN(entry->addr + entry->size);
-
if (entry->type != E820_TYPE_RAM)
- register_nosave_region(PFN_UP(entry->addr), pfn);
+ continue;
- if (pfn >= limit_pfn)
- break;
+ if (last_addr < entry->addr)
+ register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr));
+
+ last_addr = entry->addr + entry->size;
}
+
+ register_nosave_region(PFN_DOWN(last_addr), limit_pfn);
}
#ifdef CONFIG_ACPI
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x f2f29da9f0d4367f6ff35e0d9d021257bb53e273
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041735-reflector-backdrop-ceb9@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f2f29da9f0d4367f6ff35e0d9d021257bb53e273 Mon Sep 17 00:00:00 2001
From: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Date: Sun, 6 Apr 2025 11:45:22 +0700
Subject: [PATCH] x86/e820: Fix handling of subpage regions when calculating
nosave ranges in e820__register_nosave_regions()
While debugging kexec/hibernation hangs and crashes, it turned out that
the current implementation of e820__register_nosave_regions() suffers from
multiple serious issues:
- The end of last region is tracked by PFN, causing it to find holes
that aren't there if two consecutive subpage regions are present
- The nosave PFN ranges derived from holes are rounded out (instead of
rounded in) which makes it inconsistent with how explicitly reserved
regions are handled
Fix this by:
- Treating reserved regions as if they were holes, to ensure consistent
handling (rounding out nosave PFN ranges is more correct as the
kernel does not use partial pages)
- Tracking the end of the last RAM region by address instead of pages
to detect holes more precisely
These bugs appear to have been introduced about ~18 years ago with the very
first version of e820_mark_nosave_regions(), and its flawed assumptions were
carried forward uninterrupted through various waves of rewrites and renames.
[ mingo: Added Git archeology details, for kicks and giggles. ]
Fixes: e8eff5ac294e ("[PATCH] Make swsusp avoid memory holes and reserved memory regions on x86_64")
Reported-by: Roberto Ricci <io(a)r-ricci.it>
Tested-by: Roberto Ricci <io(a)r-ricci.it>
Signed-off-by: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: Len Brown <len.brown(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250406-fix-e820-nosave-v3-1-f3787bc1ee1d@qtmlab…
Closes: https://lore.kernel.org/all/Z4WFjBVHpndct7br@desktop0a/
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 57120f0749cc..9d8dd8deb2a7 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
void __init e820__register_nosave_regions(unsigned long limit_pfn)
{
int i;
- unsigned long pfn = 0;
+ u64 last_addr = 0;
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
- if (pfn < PFN_UP(entry->addr))
- register_nosave_region(pfn, PFN_UP(entry->addr));
-
- pfn = PFN_DOWN(entry->addr + entry->size);
-
if (entry->type != E820_TYPE_RAM)
- register_nosave_region(PFN_UP(entry->addr), pfn);
+ continue;
- if (pfn >= limit_pfn)
- break;
+ if (last_addr < entry->addr)
+ register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr));
+
+ last_addr = entry->addr + entry->size;
}
+
+ register_nosave_region(PFN_DOWN(last_addr), limit_pfn);
}
#ifdef CONFIG_ACPI
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x f2f29da9f0d4367f6ff35e0d9d021257bb53e273
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041730-onion-fragrant-b16a@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f2f29da9f0d4367f6ff35e0d9d021257bb53e273 Mon Sep 17 00:00:00 2001
From: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Date: Sun, 6 Apr 2025 11:45:22 +0700
Subject: [PATCH] x86/e820: Fix handling of subpage regions when calculating
nosave ranges in e820__register_nosave_regions()
While debugging kexec/hibernation hangs and crashes, it turned out that
the current implementation of e820__register_nosave_regions() suffers from
multiple serious issues:
- The end of last region is tracked by PFN, causing it to find holes
that aren't there if two consecutive subpage regions are present
- The nosave PFN ranges derived from holes are rounded out (instead of
rounded in) which makes it inconsistent with how explicitly reserved
regions are handled
Fix this by:
- Treating reserved regions as if they were holes, to ensure consistent
handling (rounding out nosave PFN ranges is more correct as the
kernel does not use partial pages)
- Tracking the end of the last RAM region by address instead of pages
to detect holes more precisely
These bugs appear to have been introduced about ~18 years ago with the very
first version of e820_mark_nosave_regions(), and its flawed assumptions were
carried forward uninterrupted through various waves of rewrites and renames.
[ mingo: Added Git archeology details, for kicks and giggles. ]
Fixes: e8eff5ac294e ("[PATCH] Make swsusp avoid memory holes and reserved memory regions on x86_64")
Reported-by: Roberto Ricci <io(a)r-ricci.it>
Tested-by: Roberto Ricci <io(a)r-ricci.it>
Signed-off-by: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: Len Brown <len.brown(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250406-fix-e820-nosave-v3-1-f3787bc1ee1d@qtmlab…
Closes: https://lore.kernel.org/all/Z4WFjBVHpndct7br@desktop0a/
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 57120f0749cc..9d8dd8deb2a7 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
void __init e820__register_nosave_regions(unsigned long limit_pfn)
{
int i;
- unsigned long pfn = 0;
+ u64 last_addr = 0;
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
- if (pfn < PFN_UP(entry->addr))
- register_nosave_region(pfn, PFN_UP(entry->addr));
-
- pfn = PFN_DOWN(entry->addr + entry->size);
-
if (entry->type != E820_TYPE_RAM)
- register_nosave_region(PFN_UP(entry->addr), pfn);
+ continue;
- if (pfn >= limit_pfn)
- break;
+ if (last_addr < entry->addr)
+ register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr));
+
+ last_addr = entry->addr + entry->size;
}
+
+ register_nosave_region(PFN_DOWN(last_addr), limit_pfn);
}
#ifdef CONFIG_ACPI
The patch below does not apply to the 6.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.14.y
git checkout FETCH_HEAD
git cherry-pick -x f2f29da9f0d4367f6ff35e0d9d021257bb53e273
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041726-raft-annotate-af34@gregkh' --subject-prefix 'PATCH 6.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f2f29da9f0d4367f6ff35e0d9d021257bb53e273 Mon Sep 17 00:00:00 2001
From: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Date: Sun, 6 Apr 2025 11:45:22 +0700
Subject: [PATCH] x86/e820: Fix handling of subpage regions when calculating
nosave ranges in e820__register_nosave_regions()
While debugging kexec/hibernation hangs and crashes, it turned out that
the current implementation of e820__register_nosave_regions() suffers from
multiple serious issues:
- The end of last region is tracked by PFN, causing it to find holes
that aren't there if two consecutive subpage regions are present
- The nosave PFN ranges derived from holes are rounded out (instead of
rounded in) which makes it inconsistent with how explicitly reserved
regions are handled
Fix this by:
- Treating reserved regions as if they were holes, to ensure consistent
handling (rounding out nosave PFN ranges is more correct as the
kernel does not use partial pages)
- Tracking the end of the last RAM region by address instead of pages
to detect holes more precisely
These bugs appear to have been introduced about ~18 years ago with the very
first version of e820_mark_nosave_regions(), and its flawed assumptions were
carried forward uninterrupted through various waves of rewrites and renames.
[ mingo: Added Git archeology details, for kicks and giggles. ]
Fixes: e8eff5ac294e ("[PATCH] Make swsusp avoid memory holes and reserved memory regions on x86_64")
Reported-by: Roberto Ricci <io(a)r-ricci.it>
Tested-by: Roberto Ricci <io(a)r-ricci.it>
Signed-off-by: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: David Woodhouse <dwmw(a)amazon.co.uk>
Cc: Len Brown <len.brown(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250406-fix-e820-nosave-v3-1-f3787bc1ee1d@qtmlab…
Closes: https://lore.kernel.org/all/Z4WFjBVHpndct7br@desktop0a/
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 57120f0749cc..9d8dd8deb2a7 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
void __init e820__register_nosave_regions(unsigned long limit_pfn)
{
int i;
- unsigned long pfn = 0;
+ u64 last_addr = 0;
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
- if (pfn < PFN_UP(entry->addr))
- register_nosave_region(pfn, PFN_UP(entry->addr));
-
- pfn = PFN_DOWN(entry->addr + entry->size);
-
if (entry->type != E820_TYPE_RAM)
- register_nosave_region(PFN_UP(entry->addr), pfn);
+ continue;
- if (pfn >= limit_pfn)
- break;
+ if (last_addr < entry->addr)
+ register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr));
+
+ last_addr = entry->addr + entry->size;
}
+
+ register_nosave_region(PFN_DOWN(last_addr), limit_pfn);
}
#ifdef CONFIG_ACPI
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 94ba17adaba0f651fdcf745c8891a88e2e028cfa
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041726-diffuser-spud-36df@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94ba17adaba0f651fdcf745c8891a88e2e028cfa Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj(a)kernel.org>
Date: Fri, 7 Feb 2025 13:20:33 -0800
Subject: [PATCH] mm/damon: avoid applying DAMOS action to same entity multiple
times
'paddr' DAMON operations set can apply a DAMOS scheme's action to a large
folio multiple times in single DAMOS-regions-walk if the folio is laid on
multiple DAMON regions. Add a field for DAMOS scheme object that can be
used by the underlying ops to know what was the last entity that the
scheme's action has applied. The core layer unsets the field when each
DAMOS-regions-walk is done for the given scheme. And update 'paddr' ops
to use the infrastructure to avoid the problem.
Link: https://lkml.kernel.org/r/20250207212033.45269-3-sj@kernel.org
Fixes: 57223ac29584 ("mm/damon/paddr: support the pageout scheme")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reported-by: Usama Arif <usamaarif642(a)gmail.com>
Closes: https://lore.kernel.org/20250203225604.44742-3-usamaarif642@gmail.com
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/damon.h b/include/linux/damon.h
index c9074d569596..b4d37d9b9221 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -432,6 +432,7 @@ struct damos_access_pattern {
* @wmarks: Watermarks for automated (in)activation of this scheme.
* @target_nid: Destination node if @action is "migrate_{hot,cold}".
* @filters: Additional set of &struct damos_filter for &action.
+ * @last_applied: Last @action applied ops-managing entity.
* @stat: Statistics of this scheme.
* @list: List head for siblings.
*
@@ -454,6 +455,15 @@ struct damos_access_pattern {
* implementation could check pages of the region and skip &action to respect
* &filters
*
+ * The minimum entity that @action can be applied depends on the underlying
+ * &struct damon_operations. Since it may not be aligned with the core layer
+ * abstract, namely &struct damon_region, &struct damon_operations could apply
+ * @action to same entity multiple times. Large folios that underlying on
+ * multiple &struct damon region objects could be such examples. The &struct
+ * damon_operations can use @last_applied to avoid that. DAMOS core logic
+ * unsets @last_applied when each regions walking for applying the scheme is
+ * finished.
+ *
* After applying the &action to each region, &stat_count and &stat_sz is
* updated to reflect the number of regions and total size of regions that the
* &action is applied.
@@ -482,6 +492,7 @@ struct damos {
int target_nid;
};
struct list_head filters;
+ void *last_applied;
struct damos_stat stat;
struct list_head list;
};
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 384935ef4e65..dc8f94fe7c3b 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1856,6 +1856,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
s->next_apply_sis = c->passed_sample_intervals +
(s->apply_interval_us ? s->apply_interval_us :
c->attrs.aggr_interval) / sample_interval;
+ s->last_applied = NULL;
}
}
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 2ac19ebc7076..eb10a723b0a7 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -254,6 +254,17 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio)
return false;
}
+static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s)
+{
+ if (!folio)
+ return true;
+ if (folio == s->last_applied) {
+ folio_put(folio);
+ return true;
+ }
+ return false;
+}
+
static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
unsigned long *sz_filter_passed)
{
@@ -261,6 +272,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
LIST_HEAD(folio_list);
bool install_young_filter = true;
struct damos_filter *filter;
+ struct folio *folio;
/* check access in page level again by default */
damos_for_each_filter(filter, s) {
@@ -279,9 +291,8 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -307,6 +318,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
damos_destroy_filter(filter);
applied = reclaim_pages(&folio_list);
cond_resched();
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -315,12 +327,12 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
unsigned long *sz_filter_passed)
{
unsigned long addr, applied = 0;
+ struct folio *folio;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -339,6 +351,7 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
addr += folio_size(folio);
folio_put(folio);
}
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -482,12 +495,12 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
{
unsigned long addr, applied;
LIST_HEAD(folio_list);
+ struct folio *folio;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -506,6 +519,7 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
}
applied = damon_pa_migrate_pages(&folio_list, s->target_nid);
cond_resched();
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -523,15 +537,15 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s,
{
unsigned long addr;
LIST_HEAD(folio_list);
+ struct folio *folio;
if (!damon_pa_scheme_has_filter(s))
return 0;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -541,6 +555,7 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s,
addr += folio_size(folio);
folio_put(folio);
}
+ s->last_applied = folio;
return 0;
}
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 94ba17adaba0f651fdcf745c8891a88e2e028cfa
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041725-overbite-undermost-15f3@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94ba17adaba0f651fdcf745c8891a88e2e028cfa Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj(a)kernel.org>
Date: Fri, 7 Feb 2025 13:20:33 -0800
Subject: [PATCH] mm/damon: avoid applying DAMOS action to same entity multiple
times
'paddr' DAMON operations set can apply a DAMOS scheme's action to a large
folio multiple times in single DAMOS-regions-walk if the folio is laid on
multiple DAMON regions. Add a field for DAMOS scheme object that can be
used by the underlying ops to know what was the last entity that the
scheme's action has applied. The core layer unsets the field when each
DAMOS-regions-walk is done for the given scheme. And update 'paddr' ops
to use the infrastructure to avoid the problem.
Link: https://lkml.kernel.org/r/20250207212033.45269-3-sj@kernel.org
Fixes: 57223ac29584 ("mm/damon/paddr: support the pageout scheme")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reported-by: Usama Arif <usamaarif642(a)gmail.com>
Closes: https://lore.kernel.org/20250203225604.44742-3-usamaarif642@gmail.com
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/damon.h b/include/linux/damon.h
index c9074d569596..b4d37d9b9221 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -432,6 +432,7 @@ struct damos_access_pattern {
* @wmarks: Watermarks for automated (in)activation of this scheme.
* @target_nid: Destination node if @action is "migrate_{hot,cold}".
* @filters: Additional set of &struct damos_filter for &action.
+ * @last_applied: Last @action applied ops-managing entity.
* @stat: Statistics of this scheme.
* @list: List head for siblings.
*
@@ -454,6 +455,15 @@ struct damos_access_pattern {
* implementation could check pages of the region and skip &action to respect
* &filters
*
+ * The minimum entity that @action can be applied depends on the underlying
+ * &struct damon_operations. Since it may not be aligned with the core layer
+ * abstract, namely &struct damon_region, &struct damon_operations could apply
+ * @action to same entity multiple times. Large folios that underlying on
+ * multiple &struct damon region objects could be such examples. The &struct
+ * damon_operations can use @last_applied to avoid that. DAMOS core logic
+ * unsets @last_applied when each regions walking for applying the scheme is
+ * finished.
+ *
* After applying the &action to each region, &stat_count and &stat_sz is
* updated to reflect the number of regions and total size of regions that the
* &action is applied.
@@ -482,6 +492,7 @@ struct damos {
int target_nid;
};
struct list_head filters;
+ void *last_applied;
struct damos_stat stat;
struct list_head list;
};
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 384935ef4e65..dc8f94fe7c3b 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1856,6 +1856,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
s->next_apply_sis = c->passed_sample_intervals +
(s->apply_interval_us ? s->apply_interval_us :
c->attrs.aggr_interval) / sample_interval;
+ s->last_applied = NULL;
}
}
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 2ac19ebc7076..eb10a723b0a7 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -254,6 +254,17 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio)
return false;
}
+static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s)
+{
+ if (!folio)
+ return true;
+ if (folio == s->last_applied) {
+ folio_put(folio);
+ return true;
+ }
+ return false;
+}
+
static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
unsigned long *sz_filter_passed)
{
@@ -261,6 +272,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
LIST_HEAD(folio_list);
bool install_young_filter = true;
struct damos_filter *filter;
+ struct folio *folio;
/* check access in page level again by default */
damos_for_each_filter(filter, s) {
@@ -279,9 +291,8 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -307,6 +318,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
damos_destroy_filter(filter);
applied = reclaim_pages(&folio_list);
cond_resched();
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -315,12 +327,12 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
unsigned long *sz_filter_passed)
{
unsigned long addr, applied = 0;
+ struct folio *folio;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -339,6 +351,7 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
addr += folio_size(folio);
folio_put(folio);
}
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -482,12 +495,12 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
{
unsigned long addr, applied;
LIST_HEAD(folio_list);
+ struct folio *folio;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -506,6 +519,7 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
}
applied = damon_pa_migrate_pages(&folio_list, s->target_nid);
cond_resched();
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -523,15 +537,15 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s,
{
unsigned long addr;
LIST_HEAD(folio_list);
+ struct folio *folio;
if (!damon_pa_scheme_has_filter(s))
return 0;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -541,6 +555,7 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s,
addr += folio_size(folio);
folio_put(folio);
}
+ s->last_applied = folio;
return 0;
}
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 94ba17adaba0f651fdcf745c8891a88e2e028cfa
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041725-premises-moonshine-36f4@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94ba17adaba0f651fdcf745c8891a88e2e028cfa Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj(a)kernel.org>
Date: Fri, 7 Feb 2025 13:20:33 -0800
Subject: [PATCH] mm/damon: avoid applying DAMOS action to same entity multiple
times
'paddr' DAMON operations set can apply a DAMOS scheme's action to a large
folio multiple times in single DAMOS-regions-walk if the folio is laid on
multiple DAMON regions. Add a field for DAMOS scheme object that can be
used by the underlying ops to know what was the last entity that the
scheme's action has applied. The core layer unsets the field when each
DAMOS-regions-walk is done for the given scheme. And update 'paddr' ops
to use the infrastructure to avoid the problem.
Link: https://lkml.kernel.org/r/20250207212033.45269-3-sj@kernel.org
Fixes: 57223ac29584 ("mm/damon/paddr: support the pageout scheme")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reported-by: Usama Arif <usamaarif642(a)gmail.com>
Closes: https://lore.kernel.org/20250203225604.44742-3-usamaarif642@gmail.com
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/damon.h b/include/linux/damon.h
index c9074d569596..b4d37d9b9221 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -432,6 +432,7 @@ struct damos_access_pattern {
* @wmarks: Watermarks for automated (in)activation of this scheme.
* @target_nid: Destination node if @action is "migrate_{hot,cold}".
* @filters: Additional set of &struct damos_filter for &action.
+ * @last_applied: Last @action applied ops-managing entity.
* @stat: Statistics of this scheme.
* @list: List head for siblings.
*
@@ -454,6 +455,15 @@ struct damos_access_pattern {
* implementation could check pages of the region and skip &action to respect
* &filters
*
+ * The minimum entity that @action can be applied depends on the underlying
+ * &struct damon_operations. Since it may not be aligned with the core layer
+ * abstract, namely &struct damon_region, &struct damon_operations could apply
+ * @action to same entity multiple times. Large folios that underlying on
+ * multiple &struct damon region objects could be such examples. The &struct
+ * damon_operations can use @last_applied to avoid that. DAMOS core logic
+ * unsets @last_applied when each regions walking for applying the scheme is
+ * finished.
+ *
* After applying the &action to each region, &stat_count and &stat_sz is
* updated to reflect the number of regions and total size of regions that the
* &action is applied.
@@ -482,6 +492,7 @@ struct damos {
int target_nid;
};
struct list_head filters;
+ void *last_applied;
struct damos_stat stat;
struct list_head list;
};
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 384935ef4e65..dc8f94fe7c3b 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1856,6 +1856,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
s->next_apply_sis = c->passed_sample_intervals +
(s->apply_interval_us ? s->apply_interval_us :
c->attrs.aggr_interval) / sample_interval;
+ s->last_applied = NULL;
}
}
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 2ac19ebc7076..eb10a723b0a7 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -254,6 +254,17 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio)
return false;
}
+static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s)
+{
+ if (!folio)
+ return true;
+ if (folio == s->last_applied) {
+ folio_put(folio);
+ return true;
+ }
+ return false;
+}
+
static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
unsigned long *sz_filter_passed)
{
@@ -261,6 +272,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
LIST_HEAD(folio_list);
bool install_young_filter = true;
struct damos_filter *filter;
+ struct folio *folio;
/* check access in page level again by default */
damos_for_each_filter(filter, s) {
@@ -279,9 +291,8 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -307,6 +318,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
damos_destroy_filter(filter);
applied = reclaim_pages(&folio_list);
cond_resched();
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -315,12 +327,12 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
unsigned long *sz_filter_passed)
{
unsigned long addr, applied = 0;
+ struct folio *folio;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -339,6 +351,7 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
addr += folio_size(folio);
folio_put(folio);
}
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -482,12 +495,12 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
{
unsigned long addr, applied;
LIST_HEAD(folio_list);
+ struct folio *folio;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -506,6 +519,7 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
}
applied = damon_pa_migrate_pages(&folio_list, s->target_nid);
cond_resched();
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -523,15 +537,15 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s,
{
unsigned long addr;
LIST_HEAD(folio_list);
+ struct folio *folio;
if (!damon_pa_scheme_has_filter(s))
return 0;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -541,6 +555,7 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s,
addr += folio_size(folio);
folio_put(folio);
}
+ s->last_applied = folio;
return 0;
}
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x 94ba17adaba0f651fdcf745c8891a88e2e028cfa
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041724-sporty-dismantle-42e9@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94ba17adaba0f651fdcf745c8891a88e2e028cfa Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj(a)kernel.org>
Date: Fri, 7 Feb 2025 13:20:33 -0800
Subject: [PATCH] mm/damon: avoid applying DAMOS action to same entity multiple
times
'paddr' DAMON operations set can apply a DAMOS scheme's action to a large
folio multiple times in single DAMOS-regions-walk if the folio is laid on
multiple DAMON regions. Add a field for DAMOS scheme object that can be
used by the underlying ops to know what was the last entity that the
scheme's action has applied. The core layer unsets the field when each
DAMOS-regions-walk is done for the given scheme. And update 'paddr' ops
to use the infrastructure to avoid the problem.
Link: https://lkml.kernel.org/r/20250207212033.45269-3-sj@kernel.org
Fixes: 57223ac29584 ("mm/damon/paddr: support the pageout scheme")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reported-by: Usama Arif <usamaarif642(a)gmail.com>
Closes: https://lore.kernel.org/20250203225604.44742-3-usamaarif642@gmail.com
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/include/linux/damon.h b/include/linux/damon.h
index c9074d569596..b4d37d9b9221 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -432,6 +432,7 @@ struct damos_access_pattern {
* @wmarks: Watermarks for automated (in)activation of this scheme.
* @target_nid: Destination node if @action is "migrate_{hot,cold}".
* @filters: Additional set of &struct damos_filter for &action.
+ * @last_applied: Last @action applied ops-managing entity.
* @stat: Statistics of this scheme.
* @list: List head for siblings.
*
@@ -454,6 +455,15 @@ struct damos_access_pattern {
* implementation could check pages of the region and skip &action to respect
* &filters
*
+ * The minimum entity that @action can be applied depends on the underlying
+ * &struct damon_operations. Since it may not be aligned with the core layer
+ * abstract, namely &struct damon_region, &struct damon_operations could apply
+ * @action to same entity multiple times. Large folios that underlying on
+ * multiple &struct damon region objects could be such examples. The &struct
+ * damon_operations can use @last_applied to avoid that. DAMOS core logic
+ * unsets @last_applied when each regions walking for applying the scheme is
+ * finished.
+ *
* After applying the &action to each region, &stat_count and &stat_sz is
* updated to reflect the number of regions and total size of regions that the
* &action is applied.
@@ -482,6 +492,7 @@ struct damos {
int target_nid;
};
struct list_head filters;
+ void *last_applied;
struct damos_stat stat;
struct list_head list;
};
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 384935ef4e65..dc8f94fe7c3b 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1856,6 +1856,7 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
s->next_apply_sis = c->passed_sample_intervals +
(s->apply_interval_us ? s->apply_interval_us :
c->attrs.aggr_interval) / sample_interval;
+ s->last_applied = NULL;
}
}
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 2ac19ebc7076..eb10a723b0a7 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -254,6 +254,17 @@ static bool damos_pa_filter_out(struct damos *scheme, struct folio *folio)
return false;
}
+static bool damon_pa_invalid_damos_folio(struct folio *folio, struct damos *s)
+{
+ if (!folio)
+ return true;
+ if (folio == s->last_applied) {
+ folio_put(folio);
+ return true;
+ }
+ return false;
+}
+
static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
unsigned long *sz_filter_passed)
{
@@ -261,6 +272,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
LIST_HEAD(folio_list);
bool install_young_filter = true;
struct damos_filter *filter;
+ struct folio *folio;
/* check access in page level again by default */
damos_for_each_filter(filter, s) {
@@ -279,9 +291,8 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -307,6 +318,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s,
damos_destroy_filter(filter);
applied = reclaim_pages(&folio_list);
cond_resched();
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -315,12 +327,12 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
unsigned long *sz_filter_passed)
{
unsigned long addr, applied = 0;
+ struct folio *folio;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -339,6 +351,7 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
addr += folio_size(folio);
folio_put(folio);
}
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -482,12 +495,12 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
{
unsigned long addr, applied;
LIST_HEAD(folio_list);
+ struct folio *folio;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -506,6 +519,7 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
}
applied = damon_pa_migrate_pages(&folio_list, s->target_nid);
cond_resched();
+ s->last_applied = folio;
return applied * PAGE_SIZE;
}
@@ -523,15 +537,15 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s,
{
unsigned long addr;
LIST_HEAD(folio_list);
+ struct folio *folio;
if (!damon_pa_scheme_has_filter(s))
return 0;
addr = r->ar.start;
while (addr < r->ar.end) {
- struct folio *folio = damon_get_folio(PHYS_PFN(addr));
-
- if (!folio) {
+ folio = damon_get_folio(PHYS_PFN(addr));
+ if (damon_pa_invalid_damos_folio(folio, s)) {
addr += PAGE_SIZE;
continue;
}
@@ -541,6 +555,7 @@ static unsigned long damon_pa_stat(struct damon_region *r, struct damos *s,
addr += folio_size(folio);
folio_put(folio);
}
+ s->last_applied = folio;
return 0;
}