read_hv_sched_clock_tsc() assumes that the Hyper-V clock counter is
bigger than the variable hv_sched_clock_offset, which is cached during
early boot, but depending on the timing this assumption may be false
when a hibernated VM starts again (the clock counter starts from 0
again) and is resuming back (Note: hv_init_tsc_clocksource() is not
called during hibernation/resume); consequently,
read_hv_sched_clock_tsc() may return a negative integer (which is
interpreted as a huge positive integer since the return type is u64)
and new kernel messages are prefixed with huge timestamps before
read_hv_sched_clock_tsc() grows big enough (which typically takes
several seconds).
Fix the issue by saving the Hyper-V clock counter just before the
suspend, and using it to correct the hv_sched_clock_offset in
resume. This makes hv tsc page based sched_clock continuous and ensures
that post resume, it starts from where it left off during suspend.
Override x86_platform.save_sched_clock_state and
x86_platform.restore_sched_clock_state routines to correct this as soon
as possible.
Note: if Invariant TSC is available, the issue doesn't happen because
1) we don't register read_hv_sched_clock_tsc() for sched clock:
See commit e5313f1c5404 ("clocksource/drivers/hyper-v: Rework
clocksource and sched clock setup");
2) the common x86 code adjusts TSC similarly: see
__restore_processor_state() -> tsc_verify_tsc_adjust(true) and
x86_platform.restore_sched_clock_state().
Cc: stable(a)vger.kernel.org
Fixes: 1349401ff1aa ("clocksource/drivers/hyper-v: Suspend/resume Hyper-V clocksource for hibernation")
Co-developed-by: Dexuan Cui <decui(a)microsoft.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Signed-off-by: Naman Jain <namjain(a)linux.microsoft.com>
---
Changes from v2:
https://lore.kernel.org/all/20240911045632.3757-1-namjain@linux.microsoft.c…
Addressed Michael's comments:
* Changed commit msg to include information on making timestamps
continuous
* Changed subject to reflect the new file being changed
* Changed variable name for saving offset/counters
* Moved comment on new function introduced from header file to function
definition.
* Removed the equations in comments
* Rebased to latest linux-next tip
Changes from v1:
https://lore.kernel.org/all/20240909053923.8512-1-namjain@linux.microsoft.c…
* Reorganized code as per Michael's comment, and moved the logic to x86
specific files, to keep hyperv_timer.c arch independent.
---
arch/x86/kernel/cpu/mshyperv.c | 58 ++++++++++++++++++++++++++++++
drivers/clocksource/hyperv_timer.c | 14 +++++++-
include/clocksource/hyperv_timer.h | 2 ++
3 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index ead967479fa6..e8e25d6e64cd 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -224,6 +224,63 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
hyperv_cleanup();
}
#endif /* CONFIG_CRASH_DUMP */
+
+static u64 hv_ref_counter_at_suspend;
+static void (*old_save_sched_clock_state)(void);
+static void (*old_restore_sched_clock_state)(void);
+
+/*
+ * Hyper-V clock counter resets during hibernation. Save and restore clock
+ * offset during suspend/resume, while also considering the time passed
+ * before suspend. This is to make sure that sched_clock using hv tsc page
+ * based clocksource, proceeds from where it left off during suspend and
+ * it shows correct time for the timestamps of kernel messages after resume.
+ */
+static void save_hv_clock_tsc_state(void)
+{
+ hv_ref_counter_at_suspend = hv_read_reference_counter();
+}
+
+static void restore_hv_clock_tsc_state(void)
+{
+ /*
+ * Adjust the offsets used by hv tsc clocksource to
+ * account for the time spent before hibernation.
+ * adjusted value = reference counter (time) at suspend
+ * - reference counter (time) now.
+ */
+ hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter());
+}
+
+/*
+ * Functions to override save_sched_clock_state and restore_sched_clock_state
+ * functions of x86_platform. The Hyper-V clock counter is reset during
+ * suspend-resume and the offset used to measure time needs to be
+ * corrected, post resume.
+ */
+static void hv_save_sched_clock_state(void)
+{
+ old_save_sched_clock_state();
+ save_hv_clock_tsc_state();
+}
+
+static void hv_restore_sched_clock_state(void)
+{
+ restore_hv_clock_tsc_state();
+ old_restore_sched_clock_state();
+}
+
+static void __init x86_setup_ops_for_tsc_pg_clock(void)
+{
+ if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+ return;
+
+ old_save_sched_clock_state = x86_platform.save_sched_clock_state;
+ x86_platform.save_sched_clock_state = hv_save_sched_clock_state;
+
+ old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
+ x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
+}
#endif /* CONFIG_HYPERV */
static uint32_t __init ms_hyperv_platform(void)
@@ -590,6 +647,7 @@ static void __init ms_hyperv_init_platform(void)
/* Register Hyper-V specific clocksource */
hv_init_clocksource();
+ x86_setup_ops_for_tsc_pg_clock();
hv_vtl_init_platform();
#endif
/*
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 99177835cade..b39dee7b93af 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -27,7 +27,8 @@
#include <asm/mshyperv.h>
static struct clock_event_device __percpu *hv_clock_event;
-static u64 hv_sched_clock_offset __ro_after_init;
+/* Note: offset can hold negative values after hibernation. */
+static u64 hv_sched_clock_offset __read_mostly;
/*
* If false, we're using the old mechanism for stimer0 interrupts
@@ -470,6 +471,17 @@ static void resume_hv_clock_tsc(struct clocksource *arg)
hv_set_msr(HV_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
}
+/*
+ * Called during resume from hibernation, from overridden
+ * x86_platform.restore_sched_clock_state routine. This is to adjust offsets
+ * used to calculate time for hv tsc page based sched_clock, to account for
+ * time spent before hibernation.
+ */
+void hv_adj_sched_clock_offset(u64 offset)
+{
+ hv_sched_clock_offset -= offset;
+}
+
#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
static int hv_cs_enable(struct clocksource *cs)
{
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
index 6cdc873ac907..aa5233b1eba9 100644
--- a/include/clocksource/hyperv_timer.h
+++ b/include/clocksource/hyperv_timer.h
@@ -38,6 +38,8 @@ extern void hv_remap_tsc_clocksource(void);
extern unsigned long hv_get_tsc_pfn(void);
extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
+extern void hv_adj_sched_clock_offset(u64 offset);
+
static __always_inline bool
hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
u64 *cur_tsc, u64 *time)
base-commit: a430d95c5efa2b545d26a094eb5f624e36732af0
--
2.34.1
Since 5.16 and prior to 6.13 KVM can't be used with FSDAX
guest memory (PMD pages). To reproduce the issue you need to reserve
guest memory with `memmap=` cmdline, create and mount FS in DAX mode
(tested both XFS and ext4), see doc link below. ndctl command for test:
ndctl create-namespace -v -e namespace1.0 --map=dev --mode=fsdax -a 2M
Then pass memory object to qemu like:
-m 8G -object memory-backend-file,id=ram0,size=8G,\
mem-path=/mnt/pmem/guestmem,share=on,prealloc=on,dump=off,align=2097152 \
-numa node,memdev=ram0,cpus=0-1
QEMU fails to run guest with error: kvm run failed Bad address
and there are two warnings in dmesg:
WARN_ON_ONCE(!page_count(page)) in kvm_is_zone_device_page() and
WARN_ON_ONCE(folio_ref_count(folio) <= 0) in try_grab_folio() (v6.6.63)
It looks like in the past assumption was made that pfn won't change from
faultin_pfn() to release_pfn_clean(), e.g. see
commit 4cd071d13c5c ("KVM: x86/mmu: Move calls to thp_adjust() down a level")
But kvm_page_fault structure made pfn part of mutable state, so
now release_pfn_clean() can take hugepage-adjusted pfn.
And it works for all cases (/dev/shm, hugetlb, devdax) except fsdax.
Apparently in fsdax mode faultin-pfn and adjusted-pfn may refer to
different folios, so we're getting get_page/put_page imbalance.
To solve this preserve faultin pfn in separate kvm_page_fault
field and pass it in kvm_release_pfn_clean(). Patch tested for all
mentioned guest memory backends with tdp_mmu={0,1}.
No bug in upstream as it was solved fundamentally by
commit 8dd861cc07e2 ("KVM: x86/mmu: Put refcounted pages instead of blindly releasing pfns")
and related patch series.
Link: https://nvdimm.docs.kernel.org/2mib_fs_dax.html
Fixes: 2f6305dd5676 ("KVM: MMU: change kvm_tdp_mmu_map() arguments to kvm_page_fault")
Signed-off-by: Nikolay Kuratov <kniv(a)yandex-team.ru>
---
arch/x86/kvm/mmu/mmu.c | 5 +++--
arch/x86/kvm/mmu/mmu_internal.h | 2 ++
arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
3 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 294775b7383b..2105f3bc2e59 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4321,6 +4321,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
smp_rmb();
ret = __kvm_faultin_pfn(vcpu, fault);
+ fault->faultin_pfn = fault->pfn;
if (ret != RET_PF_CONTINUE)
return ret;
@@ -4398,7 +4399,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
out_unlock:
write_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+ kvm_release_pfn_clean(fault->faultin_pfn);
return r;
}
@@ -4474,7 +4475,7 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
out_unlock:
read_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+ kvm_release_pfn_clean(fault->faultin_pfn);
return r;
}
#endif
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index decc1f153669..a016b51f9c62 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -236,6 +236,8 @@ struct kvm_page_fault {
/* Outputs of kvm_faultin_pfn. */
unsigned long mmu_seq;
kvm_pfn_t pfn;
+ /* pfn copy for kvm_release_pfn_clean(), constant after kvm_faultin_pfn() */
+ kvm_pfn_t faultin_pfn;
hva_t hva;
bool map_writable;
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index c85255073f67..b945dde6e3be 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -848,7 +848,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
out_unlock:
write_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+ kvm_release_pfn_clean(fault->faultin_pfn);
return r;
}
--
2.34.1
Changes in v2:
- Fixup for uninitilized md5sig_count stack variable
(Oops! Kudos to kernel test robot <lkp(a)intel.com>)
- Correct space damage, add a missing Fixes tag &
reformat tcp_ulp_ops_size() (Kuniyuki Iwashima)
- Take out patch for maximum attribute length, see (4) below.
Going to send it later with the next TCP-AO-diag part
(Kuniyuki Iwashima)
- Link to v1: https://lore.kernel.org/r/20241106-tcp-md5-diag-prep-v1-0-d62debf3dded@gmai…
My original intent was to replace the last non-upstream Arista's TCP-AO
piece. That is per-netns procfs seqfile which lists AO keys. In my view
an acceptable upstream alternative would be TCP-AO-diag uAPI.
So, I started by looking and reviewing TCP-MD5-diag code. And straight
away I saw a bunch of issues:
1. Similarly to TCP_MD5SIG_EXT, which doesn't check tcpm_flags for
unknown flags and so being non-extendable setsockopt(), the same
way tcp_diag_put_md5sig() dumps md5 keys in an array of
tcp_diag_md5sig, which makes it ABI non-extendable structure
as userspace can't tolerate any new members in it.
2. Inet-diag allocates netlink message for sockets in
inet_diag_dump_one_icsk(), which uses a TCP-diag callback
.idiag_get_aux_size(), that pre-calculates the needed space for
TCP-diag related information. But as neither socket lock nor
rcu_readlock() are held between allocation and the actual TCP
info filling, the TCP-related space requirement may change before
reaching tcp_diag_put_md5sig(). I.e., the number of TCP-MD5 keys on
a socket. Thankfully, TCP-MD5-diag won't overwrite the skb, but will
return EMSGSIZE, triggering WARN_ON() in inet_diag_dump_one_icsk().
3. Inet-diag "do" request* can create skb of any message required size.
But "dump" request* the skb size, since d35c99ff77ec ("netlink: do
not enter direct reclaim from netlink_dump()") is limited by
32 KB. Having in mind that sizeof(struct tcp_diag_md5sig) = 100 bytes,
dumps for sockets that have more than 327 keys are going to fail
(not counting other diag infos, which lower this limit futher).
That is much lower than the number of TCP-MD5 keys that can be
allocated on a socket with the current default
optmem_max limit (128Kb).
So, then I went and written selftests for TCP-MD5-diag and besides
confirming that (2) and (3) are not theoretical issues, I also
discovered another issues, that I didn't notice on code inspection:
4. nlattr::nla_len is __u16, which limits the largest netlink attibute
by 64Kb or by 655 tcp_diag_md5sig keys in the diag array. What
happens de-facto is that the netlink attribute gets u16 overflow,
breaking the userspace parsing - RTA_NEXT(), that should point
to the next attribute, points into the middle of md5 keys array.
In this patch set issues (2) and (4) are addressed.
(2) by not returning EMSGSIZE when the dump raced with modifying
TCP-MD5 keys on a socket, but mark the dump inconsistent by setting
NLM_F_DUMP_INTR nlmsg flag. Which changes uAPI in situations where
previously kernel did WARN() and errored the dump.
(4) by artificially limiting the maximum attribute size by U16_MAX - 1.
In order to remove the new limit from (4) solution, my plan is to
convert the dump of TCP-MD5 keys from an array to
NL_ATTR_TYPE_NESTED_ARRAY (or alike), which should also address (1).
And for (3), it's needed to teach tcp-diag how-to remember not only
socket on which previous recvmsg() stopped, but potentially TCP-MD5
key as well.
I plan in the next part of patch set address (3), (1) and the new limit
for (4), together with adding new TCP-AO-diag.
* Terminology from Documentation/userspace-api/netlink/intro.rst
Signed-off-by: Dmitry Safonov <0x7f454c46(a)gmail.com>
---
Dmitry Safonov (5):
net/diag: Do not race on dumping MD5 keys with adding new MD5 keys
net/diag: Warn only once on EMSGSIZE
net/diag: Pre-allocate optional info only if requested
net/diag: Always pre-allocate tcp_ulp info
net/netlink: Correct the comment on netlink message max cap
include/linux/inet_diag.h | 3 +-
include/net/tcp.h | 1 -
net/ipv4/inet_diag.c | 89 ++++++++++++++++++++++++++++++++++++++---------
net/ipv4/tcp_diag.c | 68 ++++++++++++++++++------------------
net/mptcp/diag.c | 20 -----------
net/netlink/af_netlink.c | 4 +--
net/tls/tls_main.c | 17 ---------
7 files changed, 110 insertions(+), 92 deletions(-)
---
base-commit: f1b785f4c7870c42330b35522c2514e39a1e28e7
change-id: 20241106-tcp-md5-diag-prep-2f0dcf371d90
Best regards,
--
Dmitry Safonov <0x7f454c46(a)gmail.com>
From: Ard Biesheuvel <ardb(a)kernel.org>
GCC and Clang both implement stack protector support based on Thread
Local Storage (TLS) variables, and this is used in the kernel to
implement per-task stack cookies, by copying a task's stack cookie into
a per-CPU variable every time it is scheduled in.
Both now also implement -mstack-protector-guard-symbol=, which permits
the TLS variable to be specified directly. This is useful because it
will allow us to move away from using a fixed offset of 40 bytes into
the per-CPU area on x86_64, which requires a lot of special handling in
the per-CPU code and the runtime relocation code.
However, while GCC is rather lax in its implementation of this command
line option, Clang actually requires that the provided symbol name
refers to a TLS variable (i.e., one declared with __thread), although it
also permits the variable to be undeclared entirely, in which case it
will use an implicit declaration of the right type.
The upshot of this is that Clang will emit the correct references to the
stack cookie variable in most cases, e.g.,
10d: 64 a1 00 00 00 00 mov %fs:0x0,%eax
10f: R_386_32 __stack_chk_guard
However, if a non-TLS definition of the symbol in question is visible in
the same compilation unit (which amounts to the whole of vmlinux if LTO
is enabled), it will drop the per-CPU prefix and emit a load from a
bogus address.
Work around this by using a symbol name that never occurs in C code, and
emit it as an alias in the linker script.
Fixes: 3fb0fdb3bbe7 ("x86/stackprotector/32: Make the canary into a regular percpu variable")
Cc: <stable(a)vger.kernel.org>
Cc: Fangrui Song <i(a)maskray.me>
Cc: Uros Bizjak <ubizjak(a)gmail.com>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Andy Lutomirski <luto(a)kernel.org>
Link: https://github.com/ClangBuiltLinux/linux/issues/1854
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
Signed-off-by: Brian Gerst <brgerst(a)gmail.com>
---
arch/x86/Makefile | 5 +++--
arch/x86/entry/entry.S | 16 ++++++++++++++++
arch/x86/include/asm/asm-prototypes.h | 3 +++
arch/x86/kernel/cpu/common.c | 2 ++
arch/x86/kernel/vmlinux.lds.S | 3 +++
5 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index cd75e78a06c1..5b773b34768d 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -142,9 +142,10 @@ ifeq ($(CONFIG_X86_32),y)
ifeq ($(CONFIG_STACKPROTECTOR),y)
ifeq ($(CONFIG_SMP),y)
- KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
+ KBUILD_CFLAGS += -mstack-protector-guard-reg=fs \
+ -mstack-protector-guard-symbol=__ref_stack_chk_guard
else
- KBUILD_CFLAGS += -mstack-protector-guard=global
+ KBUILD_CFLAGS += -mstack-protector-guard=global
endif
endif
else
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 324686bca368..b7ea3e8e9ecc 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -51,3 +51,19 @@ EXPORT_SYMBOL_GPL(mds_verw_sel);
.popsection
THUNK warn_thunk_thunk, __warn_thunk
+
+#ifndef CONFIG_X86_64
+/*
+ * Clang's implementation of TLS stack cookies requires the variable in
+ * question to be a TLS variable. If the variable happens to be defined as an
+ * ordinary variable with external linkage in the same compilation unit (which
+ * amounts to the whole of vmlinux with LTO enabled), Clang will drop the
+ * segment register prefix from the references, resulting in broken code. Work
+ * around this by avoiding the symbol used in -mstack-protector-guard-symbol=
+ * entirely in the C code, and use an alias emitted by the linker script
+ * instead.
+ */
+#ifdef CONFIG_STACKPROTECTOR
+EXPORT_SYMBOL(__ref_stack_chk_guard);
+#endif
+#endif
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 25466c4d2134..3674006e3974 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -20,3 +20,6 @@
extern void cmpxchg8b_emu(void);
#endif
+#if defined(__GENKSYMS__) && defined(CONFIG_STACKPROTECTOR)
+extern unsigned long __ref_stack_chk_guard;
+#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8f41ab219cf1..9d42bd15e06c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2091,8 +2091,10 @@ void syscall_init(void)
#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
+#ifndef CONFIG_SMP
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif
+#endif
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 410546bacc0f..d61c3584f3e6 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -468,6 +468,9 @@ SECTIONS
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
+/* needed for Clang - see arch/x86/entry/entry.S */
+PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
+
#ifdef CONFIG_X86_64
/*
* Per-cpu symbols which need to be offset from __per_cpu_load
--
2.47.0
From: Luca Stefani <luca.stefani.ge1(a)gmail.com>
There are reports that system cannot suspend due to running trim because
the task responsible for trimming the device isn't able to finish in
time, especially since we have a free extent discarding phase, which can
trim a lot of unallocated space. There are no limits on the trim size
(unlike the block group part).
Since trime isn't a critical call it can be interrupted at any time,
in such cases we stop the trim, report the amount of discarded bytes and
return an error.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=219180
Link: https://bugzilla.suse.com/show_bug.cgi?id=1229737
CC: stable(a)vger.kernel.org # 5.15+
Signed-off-by: Luca Stefani <luca.stefani.ge1(a)gmail.com>
Reviewed-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/extent-tree.c | 7 ++++++-
fs/btrfs/free-space-cache.c | 4 ++--
fs/btrfs/free-space-cache.h | 7 +++++++
3 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b3680e1c7054..599407120513 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1319,6 +1319,11 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
start += bytes_to_discard;
bytes_left -= bytes_to_discard;
*discarded_bytes += bytes_to_discard;
+
+ if (btrfs_trim_interrupted()) {
+ ret = -ERESTARTSYS;
+ break;
+ }
}
return ret;
@@ -6094,7 +6099,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
start += len;
*trimmed += bytes;
- if (fatal_signal_pending(current)) {
+ if (btrfs_trim_interrupted()) {
ret = -ERESTARTSYS;
break;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3bcf4a30cad7..9a6ec9344c3e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3808,7 +3808,7 @@ static int trim_no_bitmap(struct btrfs_block_group *block_group,
if (async && *total_trimmed)
break;
- if (fatal_signal_pending(current)) {
+ if (btrfs_trim_interrupted()) {
ret = -ERESTARTSYS;
break;
}
@@ -3999,7 +3999,7 @@ static int trim_bitmaps(struct btrfs_block_group *block_group,
}
block_group->discard_cursor = start;
- if (fatal_signal_pending(current)) {
+ if (btrfs_trim_interrupted()) {
if (start != offset)
reset_trimming_bitmap(ctl, offset);
ret = -ERESTARTSYS;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 33b4da3271b1..bd80c7b2af96 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -6,6 +6,8 @@
#ifndef BTRFS_FREE_SPACE_CACHE_H
#define BTRFS_FREE_SPACE_CACHE_H
+#include <linux/freezer.h>
+
/*
* This is the trim state of an extent or bitmap.
*
@@ -43,6 +45,11 @@ static inline bool btrfs_free_space_trimming_bitmap(
return (info->trim_state == BTRFS_TRIM_STATE_TRIMMING);
}
+static inline bool btrfs_trim_interrupted(void)
+{
+ return fatal_signal_pending(current) || freezing(current);
+}
+
/*
* Deltas are an effective way to populate global statistics. Give macro names
* to make it clear what we're doing. An example is discard_extents in
--
2.45.0
Hi stable tree maintainers,
Please revert the backports of
44c76825d6ee ("x86: Increase brk randomness entropy for 64-bit systems")
namely:
5.4: 03475167fda50b8511ef620a27409b08365882e1
5.10: 25d31baf922c1ee987efd6fcc9c7d4ab539c66b4
5.15: 06cb3463aa58906cfff72877eb7f50cb26e9ca93
6.1: b0cde867b80a5e81fcbc0383e138f5845f2005ee
6.6: 1a45994fb218d93dec48a3a86f68283db61e0936
There seems to be a bad interaction between this change and older
PIE-built qemu-user-static (for aarch64) binaries[1]. Investigation
continues to see if this will need to be reverted from 6.6, 6.11,
and mainline. But for now, it's clearly a problem for older kernels with
older qemu.
Thanks!
-Kees
[1] https://lore.kernel.org/all/202411201000.F3313C02@keescook/
--
Kees Cook
Leonard Lausen reported an issue with suspend/resume of the sc7180
devices. Fix the WB atomic check, which caused the issue. Also make sure
that DPU debugging logs are always directed to the drm_debug / DRIVER so
that usual drm.debug masks work in an expected way.
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
---
Changes in v2:
- Reworked the writeback to just drop the connector->status check.
- Expanded commit message for the debugging patch.
- Link to v1: https://lore.kernel.org/r/20240709-dpu-fix-wb-v1-0-448348bfd4cb@linaro.org
---
Dmitry Baryshkov (2):
drm/msm/dpu1: don't choke on disabling the writeback connector
drm/msm/dpu: don't play tricks with debug macros
drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 14 ++------------
drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c | 3 ---
2 files changed, 2 insertions(+), 15 deletions(-)
---
base-commit: 668d33c9ff922c4590c58754ab064aaf53c387dd
change-id: 20240709-dpu-fix-wb-6cd57e3eb182
Best regards,
--
Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>