From: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com>
CONFIG_PARAVIRT_XXL is mainly defined/used by XEN PV guests. For
other VM guest types, features supported under CONFIG_PARAVIRT
are self sufficient. CONFIG_PARAVIRT mainly provides support for
TLB flush operations and time related operations.
For TDX guest as well, paravirt calls under CONFIG_PARVIRT meets
most of its requirement except the need of HLT and SAFE_HLT
paravirt calls, which is currently defined under
CONFIG_PARAVIRT_XXL.
Since enabling CONFIG_PARAVIRT_XXL is too bloated for TDX guest
like platforms, move HLT and SAFE_HLT paravirt calls under
CONFIG_PARAVIRT.
Moving HLT and SAFE_HLT paravirt calls are not fatal and should not
break any functionality for current users of CONFIG_PARAVIRT.
Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests")
Co-developed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Signed-off-by: Vishal Annapurve <vannapurve(a)google.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Andi Kleen <ak(a)linux.intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Tested-by: Ryan Afranji <afranji(a)google.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: stable(a)kernel.org
Link: https://lore.kernel.org/r/20250228014416.3925664-2-vannapurve@google.com
---
6.12.23 fails to build with the following error if CONFIG_XEN_PV is
not set:
arch/x86/coco/tdx/tdx.c: In function ‘tdx_early_init’:
arch/x86/coco/tdx/tdx.c:1080:19: error: ‘struct pv_irq_ops’ has no member
named ‘safe_halt’
1080 | pv_ops.irq.safe_halt = tdx_safe_halt;
| ^
arch/x86/coco/tdx/tdx.c:1081:19: error: ‘struct pv_irq_ops’ has no member
named ‘halt’
1081 | pv_ops.irq.halt = tdx_halt;
| ^
This is because XEN_PV selects PARAVIRT_XXL, and 'safe_halt' and
'halt' are only defined for pv_irq_ops if PARAVIRT_XXL is defined.
The build breakage was introduced in 6.12.23 by stable commit
805e3ce5e0e3 which is a backport of 9f98a4f4e721 ("x86/tdx: Fix
arch_safe_halt() execution for TDX VMs").
Consider picking up upstream commit 22cc5ca5de52 ("x86/paravirt:
Move halt paravirt calls under CONFIG_PARAVIRT") for stable 6.12.y
which fixes the build regression by moving 'safe_halt' and 'halt'
out from under the PARAVIRT_XXL config.
This patch is 22cc5ca5de52 backported to 6.12.23. There were a
couple of merge conflicts due to the missing upstream commits below:
29188c160061 ("x86/paravirt: Remove the WBINVD callback")
3101900218d7 ("x86/paravirt: Remove unused paravirt_disable_iospace()")
I wasn't sure if it was appropriate to pull those to stable as well
and the merge conflicts were trivial.
Thanks!
Signed-off-by: Brett Mastbergen <bmastbergen(a)ciq.com>
---
arch/x86/include/asm/irqflags.h | 40 +++++++++++++++++++----------------
arch/x86/include/asm/paravirt.h | 20 +++++++++---------
arch/x86/include/asm/paravirt_types.h | 3 +--
arch/x86/kernel/paravirt.c | 13 +++++++-----
4 files changed, 41 insertions(+), 35 deletions(-)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index cf7fc2b8e3ce1f4e5f5703ae9fbb5a7e1182ad4f..1c2db11a2c3cb9a289d80d4900b9933275d1eea6 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -76,6 +76,28 @@ static __always_inline void native_local_irq_restore(unsigned long flags)
#endif
+#ifndef CONFIG_PARAVIRT
+#ifndef __ASSEMBLY__
+/*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+static __always_inline void arch_safe_halt(void)
+{
+ native_safe_halt();
+}
+
+/*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+static __always_inline void halt(void)
+{
+ native_halt();
+}
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/paravirt.h>
#else
@@ -97,24 +119,6 @@ static __always_inline void arch_local_irq_enable(void)
native_irq_enable();
}
-/*
- * Used in the idle loop; sti takes one instruction cycle
- * to complete:
- */
-static __always_inline void arch_safe_halt(void)
-{
- native_safe_halt();
-}
-
-/*
- * Used when interrupts are already enabled or to
- * shutdown the processor:
- */
-static __always_inline void halt(void)
-{
- native_halt();
-}
-
/*
* For spinlocks, etc:
*/
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index d4eb9e1d61b8ef8a3fc3a2510b0615ea93c11cb8..75d4c994f5e2a5dcbc2edbf7eed617de4a141fa0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -107,6 +107,16 @@ static inline void notify_page_enc_status_changed(unsigned long pfn,
PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
}
+static __always_inline void arch_safe_halt(void)
+{
+ PVOP_VCALL0(irq.safe_halt);
+}
+
+static inline void halt(void)
+{
+ PVOP_VCALL0(irq.halt);
+}
+
#ifdef CONFIG_PARAVIRT_XXL
static inline void load_sp0(unsigned long sp0)
{
@@ -170,16 +180,6 @@ static inline void __write_cr4(unsigned long x)
PVOP_VCALL1(cpu.write_cr4, x);
}
-static __always_inline void arch_safe_halt(void)
-{
- PVOP_VCALL0(irq.safe_halt);
-}
-
-static inline void halt(void)
-{
- PVOP_VCALL0(irq.halt);
-}
-
extern noinstr void pv_native_wbinvd(void);
static __always_inline void wbinvd(void)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8d4fbe1be489549ad33c968c2132bdbaf739b871..9334fdd1f6350231af7089802dfb94daa1653965 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -122,10 +122,9 @@ struct pv_irq_ops {
struct paravirt_callee_save save_fl;
struct paravirt_callee_save irq_disable;
struct paravirt_callee_save irq_enable;
-
+#endif
void (*safe_halt)(void);
void (*halt)(void);
-#endif
} __no_randomize_layout;
struct pv_mmu_ops {
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index fec38153355581215eb93b6301ae90b6f0bd06c5..0c1b915d7efac895b2c8be67eb3b84b998d00fbc 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -100,6 +100,11 @@ int paravirt_disable_iospace(void)
return request_resource(&ioport_resource, &reserve_ioports);
}
+static noinstr void pv_native_safe_halt(void)
+{
+ native_safe_halt();
+}
+
#ifdef CONFIG_PARAVIRT_XXL
static noinstr void pv_native_write_cr2(unsigned long val)
{
@@ -121,10 +126,6 @@ noinstr void pv_native_wbinvd(void)
native_wbinvd();
}
-static noinstr void pv_native_safe_halt(void)
-{
- native_safe_halt();
-}
#endif
struct pv_info pv_info = {
@@ -182,9 +183,11 @@ struct paravirt_patch_template pv_ops = {
.irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl),
.irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
.irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
+#endif /* CONFIG_PARAVIRT_XXL */
+
+ /* Irq HLT ops. */
.irq.safe_halt = pv_native_safe_halt,
.irq.halt = native_halt,
-#endif /* CONFIG_PARAVIRT_XXL */
/* Mmu ops. */
.mmu.flush_tlb_user = native_flush_tlb_local,
---
base-commit: 83b4161a63b87ce40d9f24f09b5b006f63d95b7c
change-id: 20250415-stable_fixup-c7b936473f53
Best regards,
--
Brett Mastbergen <bmastbergen(a)ciq.com>
The patch titled
Subject: mm: fix ratelimit_pages update error in dirty_ratio_handler()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-fix-ratelimit_pages-update-error-in-dirty_ratio_handler.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Jinliang Zheng <alexjlzheng(a)tencent.com>
Subject: mm: fix ratelimit_pages update error in dirty_ratio_handler()
Date: Tue, 15 Apr 2025 17:02:32 +0800
In dirty_ratio_handler(), vm_dirty_bytes must be set to zero before
calling writeback_set_ratelimit(), as global_dirty_limits() always
prioritizes the value of vm_dirty_bytes.
That causes ratelimit_pages to still use the value calculated based on
vm_dirty_bytes, which is wrong now.
Link: https://lkml.kernel.org/r/20250415090232.7544-1-alexjlzheng@tencent.com
Fixes: 9d823e8f6b1b ("writeback: per task dirty rate limit")
Signed-off-by: Jinliang Zheng <alexjlzheng(a)tencent.com>
Reviewed-by: MengEn Sun <mengensun(a)tencent.com>
Cc: Andrea Righi <andrea(a)betterlinux.com>
Cc: Fenggaung Wu <fengguang.wu(a)intel.com>
Cc: Jinliang Zheng <alexjlzheng(a)tencent.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page-writeback.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/page-writeback.c~mm-fix-ratelimit_pages-update-error-in-dirty_ratio_handler
+++ a/mm/page-writeback.c
@@ -520,8 +520,8 @@ static int dirty_ratio_handler(const str
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
- writeback_set_ratelimit();
vm_dirty_bytes = 0;
+ writeback_set_ratelimit();
}
return ret;
}
_
Patches currently in -mm which might be from alexjlzheng(a)tencent.com are
mm-fix-ratelimit_pages-update-error-in-dirty_ratio_handler.patch
User is reporting what smells like notifier vs folio deadlock, where
migrate_pages_batch() on core kernel side is holding folio lock(s) and
then interacting with the mappings of it, however those mappings are
tied to some userptr, which means calling into the notifier callback and
grabbing the notifier lock. With perfect timing it looks possible that
the pages we pulled from the hmm fault can get sniped by
migrate_pages_batch() at the same time that we are holding the notifier
lock to mark the pages as accessed/dirty, but at this point we also want
to grab the folio locks(s) to mark them as dirty, but if they are
contended from notifier/migrate_pages_batch side then we deadlock since
folio lock won't be dropped until we drop the notifier lock.
Fortunately the mark_page_accessed/dirty is not really needed in the
first place it seems and should have already been done by hmm fault, so
just remove it.
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4765
Fixes: 0a98219bcc96 ("drm/xe/hmm: Don't dereference struct page pointers without notifier lock")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.10+
---
drivers/gpu/drm/xe/xe_hmm.c | 24 ------------------------
1 file changed, 24 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
index c3cc0fa105e8..57b71956ddf4 100644
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ b/drivers/gpu/drm/xe/xe_hmm.c
@@ -19,29 +19,6 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end)
return (end - start) >> PAGE_SHIFT;
}
-/**
- * xe_mark_range_accessed() - mark a range is accessed, so core mm
- * have such information for memory eviction or write back to
- * hard disk
- * @range: the range to mark
- * @write: if write to this range, we mark pages in this range
- * as dirty
- */
-static void xe_mark_range_accessed(struct hmm_range *range, bool write)
-{
- struct page *page;
- u64 i, npages;
-
- npages = xe_npages_in_range(range->start, range->end);
- for (i = 0; i < npages; i++) {
- page = hmm_pfn_to_page(range->hmm_pfns[i]);
- if (write)
- set_page_dirty_lock(page);
-
- mark_page_accessed(page);
- }
-}
-
static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
struct hmm_range *range, struct rw_semaphore *notifier_sem)
{
@@ -331,7 +308,6 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
if (ret)
goto out_unlock;
- xe_mark_range_accessed(&hmm_range, write);
userptr->sg = &userptr->sgt;
xe_hmm_userptr_set_mapped(uvma);
userptr->notifier_seq = hmm_range.notifier_seq;
--
2.49.0
Hi stable maintainers,
I have tried backporting some fixes to stable kernel 5.15.y which also
have CVE numbers and are fixing commits in 5.15.y.
I am not a subsystem expert and have only done overall testing that we
do for stable release candidate testing and not any patch specific testing.
Note: All these patches are present in 6.1.y.
Patch 1 -- minor conflicts resolved due to few missing commits.
Patch 2, 3, 4 -- clean cherry-picks from 6.1.y commits and will
therefore have additional SOBs from backporter/stable maintainers
Patch 5 -- Minor conflict resolved as 5.15.y don't have folios.
Patch 6 -- Resolve conflicts due to missing unrcu_pointer() helper and
other commit
Please let me know if there are any comments.
Thanks,
Harshit
Michal Schmidt (1):
bnxt_re: avoid shift undefined behavior in bnxt_qplib_alloc_init_hwq
Paolo Abeni (1):
ipv6: release nexthop on device removal
Rémi Denis-Courmont (1):
phonet/pep: fix racy skb_queue_empty() use
Souradeep Chakrabarti (1):
net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup
Trond Myklebust (1):
filemap: Fix bounds checking in filemap_read()
Wang Liang (1):
net: fix crash when config small gso_max_size/gso_ipv4_max_size
drivers/infiniband/hw/bnxt_re/qplib_fp.c | 3 +-
drivers/net/ethernet/microsoft/mana/mana.h | 2 +
drivers/net/ethernet/microsoft/mana/mana_en.c | 21 ++++++----
mm/filemap.c | 2 +-
net/core/rtnetlink.c | 2 +-
net/ipv6/route.c | 6 +--
net/phonet/pep.c | 41 +++++++++++++++----
7 files changed, 54 insertions(+), 23 deletions(-)
--
2.47.1
This series adds fine grained trap control in EL2 required for FEAT_PMUv3p9
registers like PMICNTR_EL0, PMICFILTR_EL0, and PMUACR_EL1 which are already
being used in the kernel. This is required to prevent their EL1 access trap
into EL2.
The following commits that enabled access into FEAT_PMUv3p9 registers have
already been merged upstream from 6.12 onwards.
d8226d8cfbaf ("perf: arm_pmuv3: Add support for Armv9.4 PMU instruction counter")
0bbff9ed8165 ("perf/arm_pmuv3: Add PMUv3.9 per counter EL0 access control")
The sysreg patches in this series are required for the final patch which
fixes the actual problem.
Changes in V2:
- Dropped [PATCH 1/8] perf/arm_pmuv3: Add PMUv3.9 per counter EL0 access control
- Folded ID_AA64DFR0_EL1_PMUVer_V3P9 definition in arch/arm64/tools/sysreg which
was added in commit 0bbff9ed8165 ("perf/arm_pmuv3: Add PMUv3.9 per counter EL0
access control")
Anshuman Khandual (7):
arm64/sysreg: Update register fields for ID_AA64MMFR0_EL1
arm64/sysreg: Add register fields for HDFGRTR2_EL2
arm64/sysreg: Add register fields for HDFGWTR2_EL2
arm64/sysreg: Add register fields for HFGITR2_EL2
arm64/sysreg: Add register fields for HFGRTR2_EL2
arm64/sysreg: Add register fields for HFGWTR2_EL2
arm64/boot: Enable EL2 requirements for FEAT_PMUv3p9
Documentation/arch/arm64/booting.rst | 22 ++++++
arch/arm64/include/asm/el2_setup.h | 25 +++++++
arch/arm64/tools/sysreg | 104 +++++++++++++++++++++++++++
3 files changed, 151 insertions(+)
--
2.30.2
This patch series addresses a shutdown issue reported in [1].
This problem has been fixed on kernel 6.12 and later, kernel 6.6 is
the last kernel these upstream patches should go to because the Realtek
RTL8852BE chip supported by kernel since v6.2 is the only chip known to
have this problem.
[1] https://github.com/lwfinger/rtw89/issues/372
Zenm Chen (2):
wifi: rtw89: pci: add pre_deinit to be called after probe complete
wifi: rtw89: pci: disable PCIE wake bit when PCIE deinit
drivers/net/wireless/realtek/rtw89/core.c | 2 ++
drivers/net/wireless/realtek/rtw89/core.h | 6 ++++++
drivers/net/wireless/realtek/rtw89/pci.c | 10 ++++++++++
3 files changed, 18 insertions(+)
--
2.49.0
[ Upstream commit 96da3f7d489d11b43e7c1af90d876b9a2492cca8 ]
The hash map is now fully converted to bpf_mem_alloc. Its implementation is not
allocating synchronously and not calling call_rcu() directly. It's now safe to
use non-preallocated hash maps in all types of tracing programs including
BPF_PROG_TYPE_PERF_EVENT that runs out of NMI context.
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Acked-by: Kumar Kartikeya Dwivedi <memxor(a)gmail.com>
Acked-by: Andrii Nakryiko <andrii(a)kernel.org>
Link: https://lore.kernel.org/bpf/20220902211058.60789-13-alexei.starovoitov@gmai…
Signed-off-by: Devaansh Kumar <devaanshk840(a)gmail.com>
---
kernel/bpf/verifier.c | 29 -----------------------------
1 file changed, 29 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7049a85a78ab..77a75ccaae5e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11700,35 +11700,6 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
- /*
- * Validate that trace type programs use preallocated hash maps.
- *
- * For programs attached to PERF events this is mandatory as the
- * perf NMI can hit any arbitrary code sequence.
- *
- * All other trace types using preallocated hash maps are unsafe as
- * well because tracepoint or kprobes can be inside locked regions
- * of the memory allocator or at a place where a recursion into the
- * memory allocator would see inconsistent state.
- *
- * On RT enabled kernels run-time allocation of all trace type
- * programs is strictly prohibited due to lock type constraints. On
- * !RT kernels it is allowed for backwards compatibility reasons for
- * now, but warnings are emitted so developers are made aware of
- * the unsafety and can fix their programs before this is enforced.
- */
- if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
- if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
- verbose(env, "perf_event programs can only use preallocated hash map\n");
- return -EINVAL;
- }
- if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
- verbose(env, "trace type programs can only use preallocated hash map\n");
- return -EINVAL;
- }
- WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
- verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
- }
if (map_value_has_spin_lock(map)) {
if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
--
2.49.0
From: Dan Carpenter <dan.carpenter(a)linaro.org>
[ Upstream commit e56aac6e5a25630645607b6856d4b2a17b2311a5 ]
The "command" variable can be controlled by the user via debugfs. The
worry is that if con_index is zero then "&uc->ucsi->connector[con_index
- 1]" would be an array underflow.
Fixes: 170a6726d0e2 ("usb: typec: ucsi: add support for separate DP altmode devices")
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Link: https://lore.kernel.org/r/c69ef0b3-61b0-4dde-98dd-97b97f81d912@stanley.moun…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
[Minor context change fixed.
13f2ec3115c8 ("usb: typec: ucsi: simplify command sending API") rename
ucsi_ccg_sync_write to ucsi_ccg_sync_control in v6.11, so this patch is
applied in ucsi_ccg_sync_write in v6.6.]
Signed-off-by: Bin Lan <bin.lan.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Build test passed.
---
drivers/usb/typec/ucsi/ucsi_ccg.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
index 7c7f388aac96..3ef02d35bfbe 100644
--- a/drivers/usb/typec/ucsi/ucsi_ccg.c
+++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
@@ -585,6 +585,10 @@ static int ucsi_ccg_sync_write(struct ucsi *ucsi, unsigned int offset,
uc->has_multiple_dp) {
con_index = (uc->last_cmd_sent >> 16) &
UCSI_CMD_CONNECTOR_MASK;
+ if (con_index == 0) {
+ ret = -EINVAL;
+ goto unlock;
+ }
con = &uc->ucsi->connector[con_index - 1];
ucsi_ccg_update_set_new_cam_cmd(uc, con, (u64 *)val);
}
@@ -600,6 +604,7 @@ static int ucsi_ccg_sync_write(struct ucsi *ucsi, unsigned int offset,
err_clear_bit:
clear_bit(DEV_CMD_PENDING, &uc->flags);
pm_runtime_put_sync(uc->dev);
+unlock:
mutex_unlock(&uc->lock);
return ret;
--
2.34.1
From: Boris Burkov <boris(a)bur.io>
[ Upstream commit 30479f31d44d47ed00ae0c7453d9b253537005b2 ]
In the buffered write path, the dirty page owns the qgroup reserve until
it creates an ordered_extent.
Therefore, any errors that occur before the ordered_extent is created
must free that reservation, or else the space is leaked. The fstest
generic/475 exercises various IO error paths, and is able to trigger
errors in cow_file_range where we fail to get to allocating the ordered
extent. Note that because we *do* clear delalloc, we are likely to
remove the inode from the delalloc list, so the inodes/pages to not have
invalidate/launder called on them in the commit abort path.
This results in failures at the unmount stage of the test that look like:
BTRFS: error (device dm-8 state EA) in cleanup_transaction:2018: errno=-5 IO failure
BTRFS: error (device dm-8 state EA) in btrfs_replace_file_extents:2416: errno=-5 IO failure
BTRFS warning (device dm-8 state EA): qgroup 0/5 has unreleased space, type 0 rsv 28672
------------[ cut here ]------------
WARNING: CPU: 3 PID: 22588 at fs/btrfs/disk-io.c:4333 close_ctree+0x222/0x4d0 [btrfs]
Modules linked in: btrfs blake2b_generic libcrc32c xor zstd_compress raid6_pq
CPU: 3 PID: 22588 Comm: umount Kdump: loaded Tainted: G W 6.10.0-rc7-gab56fde445b8 #21
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
RIP: 0010:close_ctree+0x222/0x4d0 [btrfs]
RSP: 0018:ffffb4465283be00 EFLAGS: 00010202
RAX: 0000000000000001 RBX: ffffa1a1818e1000 RCX: 0000000000000001
RDX: 0000000000000000 RSI: ffffb4465283bbe0 RDI: ffffa1a19374fcb8
RBP: ffffa1a1818e13c0 R08: 0000000100028b16 R09: 0000000000000000
R10: 0000000000000003 R11: 0000000000000003 R12: ffffa1a18ad7972c
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
FS: 00007f9168312b80(0000) GS:ffffa1a4afcc0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f91683c9140 CR3: 000000010acaa000 CR4: 00000000000006f0
Call Trace:
<TASK>
? close_ctree+0x222/0x4d0 [btrfs]
? __warn.cold+0x8e/0xea
? close_ctree+0x222/0x4d0 [btrfs]
? report_bug+0xff/0x140
? handle_bug+0x3b/0x70
? exc_invalid_op+0x17/0x70
? asm_exc_invalid_op+0x1a/0x20
? close_ctree+0x222/0x4d0 [btrfs]
generic_shutdown_super+0x70/0x160
kill_anon_super+0x11/0x40
btrfs_kill_super+0x11/0x20 [btrfs]
deactivate_locked_super+0x2e/0xa0
cleanup_mnt+0xb5/0x150
task_work_run+0x57/0x80
syscall_exit_to_user_mode+0x121/0x130
do_syscall_64+0xab/0x1a0
entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f916847a887
---[ end trace 0000000000000000 ]---
BTRFS error (device dm-8 state EA): qgroup reserved space leaked
Cases 2 and 3 in the out_reserve path both pertain to this type of leak
and must free the reserved qgroup data. Because it is already an error
path, I opted not to handle the possible errors in
btrfs_free_qgroup_data.
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: David Sterba <dsterba(a)suse.com>
[Minor conflict resolved due to code context change.]
Signed-off-by: Jianqi Ren <jianqi.ren.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Verified the build test
---
fs/btrfs/inode.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cedffa567a75..fc127182067b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1546,6 +1546,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
locked_page,
clear_bits,
page_ops);
+ btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
start += cur_alloc_size;
}
@@ -1559,6 +1560,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
clear_bits |= EXTENT_CLEAR_DATA_RESV;
extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits, page_ops);
+ btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
}
return ret;
}
@@ -2222,13 +2224,15 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
*/
if (cow_start != (u64)-1)
cur_offset = cow_start;
- if (cur_offset < end)
+ if (cur_offset < end) {
extent_clear_unlock_delalloc(inode, cur_offset, end,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
+ btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL);
+ }
btrfs_free_path(path);
return ret;
}
--
2.34.1