August 2024 - Linux-stable-mirror

[PATCHv6 1/4] x86/tdx: Introduce wrappers to read and write TD metadata

by Kirill A. Shutemov

The TDG_VM_WR TDCALL is used to ask the TDX module to change some TD-specific VM configuration. There is currently only one user in the kernel of this TDCALL leaf. More will be added shortly. Refactor to make way for more users of TDG_VM_WR who will need to modify other TD configuration values. Add a wrapper for the TDG_VM_RD TDCALL that requests TD-specific metadata from the TDX module. There are currently no users for TDG_VM_RD. Mark it as __maybe_unused until the first user appears. This is preparation for enumeration and enabling optional TD features. Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com> Reviewed-by: Kai Huang <kai.huang(a)intel.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com> Cc: stable(a)vger.kernel.org --- arch/x86/coco/tdx/tdx.c | 32 ++++++++++++++++++++++++++----- arch/x86/include/asm/shared/tdx.h | 1 + 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 078e2bac2553..64717a96a936 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -77,6 +77,32 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args) panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); } +/* Read TD-scoped metadata */ +static inline u64 __maybe_unused tdg_vm_rd(u64 field, u64 *value) +{ + struct tdx_module_args args = { + .rdx = field, + }; + u64 ret; + + ret = __tdcall_ret(TDG_VM_RD, &args); + *value = args.r8; + + return ret; +} + +/* Write TD-scoped metadata */ +static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask) +{ + struct tdx_module_args args = { + .rdx = field, + .r8 = value, + .r9 = mask, + }; + + return __tdcall(TDG_VM_WR, &args); +} + /** * tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT * subtype 0) using TDG.MR.REPORT TDCALL. @@ -924,10 +950,6 @@ static void tdx_kexec_finish(void) void __init tdx_early_init(void) { - struct tdx_module_args args = { - .rdx = TDCS_NOTIFY_ENABLES, - .r9 = -1ULL, - }; u64 cc_mask; u32 eax, sig[3]; @@ -946,7 +968,7 @@ void __init tdx_early_init(void) cc_set_mask(cc_mask); /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */ - tdcall(TDG_VM_WR, &args); + tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL); /* * All bits above GPA width are reserved and kernel treats shared bit diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index fdfd41511b02..7e12cfa28bec 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -16,6 +16,7 @@ #define TDG_VP_VEINFO_GET 3 #define TDG_MR_REPORT 4 #define TDG_MEM_PAGE_ACCEPT 6 +#define TDG_VM_RD 7 #define TDG_VM_WR 8 /* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */ -- 2.45.2

10 months, 2 weeks

2
1
0 0

[git:media_stage/master] media: sun4i_csi: Implement link validate for sun4i_csi subdev

by Laurent Pinchart

This is an automatic generated email to let you know that the following patch were queued: Subject: media: sun4i_csi: Implement link validate for sun4i_csi subdev Author: Laurent Pinchart <laurent.pinchart+renesas(a)ideasonboard.com> Date: Wed Jun 19 02:46:16 2024 +0300 The sun4i_csi driver doesn't implement link validation for the subdev it registers, leaving the link between the subdev and its source unvalidated. Fix it, using the v4l2_subdev_link_validate() helper. Fixes: 577bbf23b758 ("media: sunxi: Add A10 CSI driver") Cc: stable(a)vger.kernel.org Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas(a)ideasonboard.com> Acked-by: Chen-Yu Tsai <wens(a)csie.org> Reviewed-by: Tomi Valkeinen <tomi.valkeinen+renesas(a)ideasonboard.com> Acked-by: Sakari Ailus <sakari.ailus(a)linux.intel.com> drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c | 5 +++++ 1 file changed, 5 insertions(+) --- diff --git a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c index 097a3a08ef7d..dbb26c7b2f8d 100644 --- a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c +++ b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c @@ -39,6 +39,10 @@ static const struct media_entity_operations sun4i_csi_video_entity_ops = { .link_validate = v4l2_subdev_link_validate, }; +static const struct media_entity_operations sun4i_csi_subdev_entity_ops = { + .link_validate = v4l2_subdev_link_validate, +}; + static int sun4i_csi_notify_bound(struct v4l2_async_notifier *notifier, struct v4l2_subdev *subdev, struct v4l2_async_connection *asd) @@ -214,6 +218,7 @@ static int sun4i_csi_probe(struct platform_device *pdev) subdev->internal_ops = &sun4i_csi_subdev_internal_ops; subdev->flags = V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_HAS_EVENTS; subdev->entity.function = MEDIA_ENT_F_VID_IF_BRIDGE; + subdev->entity.ops = &sun4i_csi_subdev_entity_ops; subdev->owner = THIS_MODULE; snprintf(subdev->name, sizeof(subdev->name), "sun4i-csi-0"); v4l2_set_subdevdata(subdev, csi);

10 months, 2 weeks

1
0
0 0

[PATCH 1/1] nvme-pci: add NVME_QUIRK_BOGUS_NID for Samsung PM173X

by Saeed Mirzamohammadi

This adds a quirk to fix the Samsung PM1733a and PM173X reporting bogus eui64 so they are not marked as "non globally unique" duplicates. Cc: <stable(a)vger.kernel.org> Signed-off-by: Saeed Mirzamohammadi <saeed.mirzamohammadi(a)oracle.com> --- drivers/nvme/host/pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5b95c94ee40f2..c0b1caba1c893 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3359,6 +3359,10 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | NVME_QUIRK_DISABLE_WRITE_ZEROES| NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x144d, 0xa824), /* Samsung PM173X */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x144d, 0xa825), /* Samsung PM1733a */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ -- 2.39.2

10 months, 2 weeks

6
13
0 0

Re: [PATCH] x86/hyperv: fix kexec crash due to VP assist page corruption

by Vitaly Kuznetsov

Anirudh Rayabharam <anirudh(a)anirudhrb.com> writes: > On Mon, Aug 26, 2024 at 02:36:44PM +0200, Vitaly Kuznetsov wrote: >> Anirudh Rayabharam <anirudh(a)anirudhrb.com> writes: >> >> > From: Anirudh Rayabharam (Microsoft) <anirudh(a)anirudhrb.com> >> > >> > 9636be85cc5b ("x86/hyperv: Fix hyperv_pcpu_input_arg handling when CPUs go >> > online/offline") introduces a new cpuhp state for hyperv initialization. >> > >> > cpuhp_setup_state() returns the state number if state is CPUHP_AP_ONLINE_DYN >> > or CPUHP_BP_PREPARE_DYN and 0 for all other states. For the hyperv case, >> > since a new cpuhp state was introduced it would return 0. However, >> > in hv_machine_shutdown(), the cpuhp_remove_state() call is conditioned upon >> > "hyperv_init_cpuhp > 0". This will never be true and so hv_cpu_die() won't be >> > called on all CPUs. This means the VP assist page won't be reset. When the >> > kexec kernel tries to setup the VP assist page again, the hypervisor corrupts >> > the memory region of the old VP assist page causing a panic in case the kexec >> > kernel is using that memory elsewhere. This was originally fixed in dfe94d4086e4 >> > ("x86/hyperv: Fix kexec panic/hang issues"). >> > >> > Set hyperv_init_cpuhp to CPUHP_AP_HYPERV_ONLINE upon successful setup so that >> > the hyperv cpuhp state is removed correctly on kexec and the necessary cleanup >> > takes place. >> > >> > Cc: stable(a)vger.kernel.org >> > Fixes: 9636be85cc5b ("x86/hyperv: Fix hyperv_pcpu_input_arg handling when CPUs go online/offline") >> > Signed-off-by: Anirudh Rayabharam (Microsoft) <anirudh(a)anirudhrb.com> >> > --- >> > arch/x86/hyperv/hv_init.c | 4 ++-- >> > 1 file changed, 2 insertions(+), 2 deletions(-) >> > >> > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c >> > index 17a71e92a343..81d1981a75d1 100644 >> > --- a/arch/x86/hyperv/hv_init.c >> > +++ b/arch/x86/hyperv/hv_init.c >> > @@ -607,7 +607,7 @@ void __init hyperv_init(void) >> > >> > register_syscore_ops(&hv_syscore_ops); >> > >> > - hyperv_init_cpuhp = cpuhp; >> > + hyperv_init_cpuhp = CPUHP_AP_HYPERV_ONLINE; >> >> Do we really need 'hyperv_init_cpuhp' at all? I.e. post-change (which >> LGTM btw), I can only see one usage in hv_machine_shutdown(): >> >> if (kexec_in_progress && hyperv_init_cpuhp > 0) >> cpuhp_remove_state(hyperv_init_cpuhp); >> >> and I'm wondering if the 'hyperv_init_cpuhp' check is really >> needed. This only case where this check would fail is if we're crashing >> in between ms_hyperv_init_platform() and hyperv_init() afaiu. Does it > > Or if we fail to setup the cpuhp state for some reason but don't > actually crash and then later do a kexec? I see this can happen for CPUHP_AP_ONLINE_DYN/CPUHP_BP_PREPARE_DYN because we run out of free slots (40/20), but here we have our own dedicated CPUHP_AP_HYPERV_ONLINE and other failure paths seem to be exotic... > > I guess I was just trying to be extra safe and make sure we have > actually setup the cpuhp state before calling cpuhp_remove_state() > for it. However, looking elsewhere in the kernel code I don't > see anybody doing this for custom states... > >> hurt if we try cpuhp_remove_state() anyway? > > cpuhp_invoke_callback() would trigger a WARNING if we try to remove a > cpuhp state that was never setup. > > 184 if (cpuhp_step_empty(bringup, step)) { > 185 WARN_ON_ONCE(1); > 186 return 0; > 187 } > Personally, I'd say that getting an extra WARN for such a corner case (failing to setup cpuhp state or crashing in between ms_hyperv_init_platform() and hyperv_init()) is OK. Alternatively, we can convert hyperv_init_cpuhp to a boolean to make it a bit more staitforward but as it's uncomon to do it for other states, it's likely an overkill. -- Vitaly

10 months, 2 weeks

2
1
0 0

[PATCH v2 1/1] nfsstat01: Update client RPC calls for kernel 6.9

by Petr Vorel

6.9 moved client RPC calls to namespace in "Make nfs stats visible in network NS" patchet. https://lore.kernel.org/linux-nfs/cover.1708026931.git.josef@toxicpanda.com/ Signed-off-by: Petr Vorel <pvorel(a)suse.cz> --- Changes v1->v2: * Point out whole patchset, not just single commit * Add a comment about the patchset Hi all, could you please ack this so that we have fixed mainline? FYI Some parts has been backported, e.g.: d47151b79e322 ("nfs: expose /proc/net/sunrpc/nfs in net namespaces") to all stable/LTS: 5.4.276, 5.10.217, 5.15.159, 6.1.91, 6.6.31. But most of that is not yet (but planned to be backported), e.g. 93483ac5fec62 ("nfsd: expose /proc/net/sunrpc/nfsd in net namespaces") see Chuck's patchset for 6.6 https://lore.kernel.org/linux-nfs/20240812223604.32592-1-cel@kernel.org/ Once all kernels up to 5.4 fixed we should update the version. Kind regards, Petr testcases/network/nfs/nfsstat01/nfsstat01.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/testcases/network/nfs/nfsstat01/nfsstat01.sh b/testcases/network/nfs/nfsstat01/nfsstat01.sh index c2856eff1f..1beecbec43 100755 --- a/testcases/network/nfs/nfsstat01/nfsstat01.sh +++ b/testcases/network/nfs/nfsstat01/nfsstat01.sh @@ -15,7 +15,14 @@ get_calls() local calls opt [ "$name" = "rpc" ] && opt="r" || opt="n" - ! tst_net_use_netns && [ "$nfs_f" != "nfs" ] && type="rhost" + + if tst_net_use_netns; then + # "Make nfs stats visible in network NS" patchet + # https://lore.kernel.org/linux-nfs/cover.1708026931.git.josef@toxicpanda.com/ + tst_kvcmp -ge "6.9" && [ "$nfs_f" = "nfs" ] && type="rhost" + else + [ "$nfs_f" != "nfs" ] && type="rhost" + fi if [ "$type" = "lhost" ]; then calls="$(grep $name /proc/net/rpc/$nfs_f | cut -d' ' -f$field)" -- 2.45.2

10 months, 2 weeks

6
13
0 0

[PATCH] mmc : fix for check cqe halt.

by Seunghwan Baek

To check if mmc cqe is in halt state, need to check set/clear of CQHCI_HALT bit. At this time, we need to check with &, not &&. Fixes: 0653300224a6 ("mmc: cqhci: rename cqhci.c to cqhci-core.c") Cc: stable(a)vger.kernel.org Signed-off-by: Seunghwan Baek <sh8267.baek(a)samsung.com> --- drivers/mmc/host/cqhci-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c index c14d7251d0bb..a02da26a1efd 100644 --- a/drivers/mmc/host/cqhci-core.c +++ b/drivers/mmc/host/cqhci-core.c @@ -617,7 +617,7 @@ static int cqhci_request(struct mmc_host *mmc, struct mmc_request *mrq) cqhci_writel(cq_host, 0, CQHCI_CTL); mmc->cqe_on = true; pr_debug("%s: cqhci: CQE on\n", mmc_hostname(mmc)); - if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) { + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) { pr_err("%s: cqhci: CQE failed to exit halt state\n", mmc_hostname(mmc)); } -- 2.17.1

10 months, 2 weeks

2
2
0 0

[PATCH] powerpc/qspinlock: Fix deadlock in MCS queue

by Nysal Jan K.A.

If an interrupt occurs in queued_spin_lock_slowpath() after we increment qnodesp->count and before node->lock is initialized, another CPU might see stale lock values in get_tail_qnode(). If the stale lock value happens to match the lock on that CPU, then we write to the "next" pointer of the wrong qnode. This causes a deadlock as the former CPU, once it becomes the head of the MCS queue, will spin indefinitely until it's "next" pointer is set by its successor in the queue. This results in lockups similar to the following. watchdog: CPU 15 Hard LOCKUP ...... NIP [c0000000000b78f4] queued_spin_lock_slowpath+0x1184/0x1490 LR [c000000001037c5c] _raw_spin_lock+0x6c/0x90 Call Trace: 0xc000002cfffa3bf0 (unreliable) _raw_spin_lock+0x6c/0x90 raw_spin_rq_lock_nested.part.135+0x4c/0xd0 sched_ttwu_pending+0x60/0x1f0 __flush_smp_call_function_queue+0x1dc/0x670 smp_ipi_demux_relaxed+0xa4/0x100 xive_muxed_ipi_action+0x20/0x40 __handle_irq_event_percpu+0x80/0x240 handle_irq_event_percpu+0x2c/0x80 handle_percpu_irq+0x84/0xd0 generic_handle_irq+0x54/0x80 __do_irq+0xac/0x210 __do_IRQ+0x74/0xd0 0x0 do_IRQ+0x8c/0x170 hardware_interrupt_common_virt+0x29c/0x2a0 --- interrupt: 500 at queued_spin_lock_slowpath+0x4b8/0x1490 ...... NIP [c0000000000b6c28] queued_spin_lock_slowpath+0x4b8/0x1490 LR [c000000001037c5c] _raw_spin_lock+0x6c/0x90 --- interrupt: 500 0xc0000029c1a41d00 (unreliable) _raw_spin_lock+0x6c/0x90 futex_wake+0x100/0x260 do_futex+0x21c/0x2a0 sys_futex+0x98/0x270 system_call_exception+0x14c/0x2f0 system_call_vectored_common+0x15c/0x2ec The following code flow illustrates how the deadlock occurs: CPU0 CPU1 ---- ---- spin_lock_irqsave(A) | spin_unlock_irqrestore(A) | spin_lock(B) | | | ▼ | id = qnodesp->count++; | (Note that nodes[0].lock == A) | | | ▼ | Interrupt | (happens before "nodes[0].lock = B") | | | ▼ | spin_lock_irqsave(A) | | | ▼ | id = qnodesp->count++ | nodes[1].lock = A | | | ▼ | Tail of MCS queue | | spin_lock_irqsave(A) ▼ | Head of MCS queue ▼ | CPU0 is previous tail ▼ | Spin indefinitely ▼ (until "nodes[1].next != NULL") prev = get_tail_qnode(A, CPU0) | ▼ prev == &qnodes[CPU0].nodes[0] (as qnodes[CPU0].nodes[0].lock == A) | ▼ WRITE_ONCE(prev->next, node) | ▼ Spin indefinitely (until nodes[0].locked == 1) Thanks to Saket Kumar Bhaskar for help with recreating the issue Fixes: 84990b169557 ("powerpc/qspinlock: add mcs queueing for contended waiters") Cc: stable(a)vger.kernel.org # v6.2+ Reported-by: Geetika Moolchandani <geetika(a)linux.ibm.com> Reported-by: Vaishnavi Bhat <vaish123(a)in.ibm.com> Reported-by: Jijo Varghese <vargjijo(a)in.ibm.com> Signed-off-by: Nysal Jan K.A. <nysal(a)linux.ibm.com> --- arch/powerpc/lib/qspinlock.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 5de4dd549f6e..59861c665cef 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -697,6 +697,12 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b } release: + /* + * Clear the lock, as another CPU might see stale values if an + * interrupt occurs after we increment qnodesp->count but before + * node->lock is initialized + */ + node->lock = NULL; qnodesp->count--; /* release the node */ } -- 2.46.0

10 months, 2 weeks

2
2
0 0

[PATCH v3] usb: dwc3: core: Prevent USB core invalid event buffer address access

by Selvarasu Ganesan

This commit addresses an issue where the USB core could access an invalid event buffer address during runtime suspend, potentially causing SMMU faults and other memory issues in Exynos platforms. The problem arises from the following sequence. 1. In dwc3_gadget_suspend, there is a chance of a timeout when moving the USB core to the halt state after clearing the run/stop bit by software. 2. In dwc3_core_exit, the event buffer is cleared regardless of the USB core's status, which may lead to an SMMU faults and other memory issues. if the USB core tries to access the event buffer address. To prevent this hardware quirk on Exynos platforms, this commit ensures that the event buffer address is not cleared by software when the USB core is active during runtime suspend by checking its status before clearing the buffer address. Cc: stable(a)vger.kernel.org # v6.1+ Signed-off-by: Selvarasu Ganesan <selvarasu.g(a)samsung.com> --- Changes in v3: - Added comment on why we need this fix. - Included platform name in commit message. - Removed Fixes tag as no issue on the previous commits, and updated Cc tag. - Link to v2: https://lore.kernel.org/lkml/20240808120507.1464-1-selvarasu.g@samsung.com/ Changes in v2: - Added separate check for USB controller status before cleaning the event buffer. - Link to v1: https://lore.kernel.org/lkml/20240722145617.537-1-selvarasu.g@samsung.com/ --- drivers/usb/dwc3/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 734de2a8bd21..ccc3895dbd7f 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -564,9 +564,17 @@ int dwc3_event_buffers_setup(struct dwc3 *dwc) void dwc3_event_buffers_cleanup(struct dwc3 *dwc) { struct dwc3_event_buffer *evt; + u32 reg; if (!dwc->ev_buf) return; + /* + * Exynos platforms may not be able to access event buffer if the + * controller failed to halt on dwc3_core_exit(). + */ + reg = dwc3_readl(dwc->regs, DWC3_DSTS); + if (!(reg & DWC3_DSTS_DEVCTRLHLT)) + return; evt = dwc->ev_buf; -- 2.17.1

10 months, 2 weeks

3
8
0 0

+ codetag-debug-mark-codetags-for-poisoned-page-as-empty.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: codetag: debug: mark codetags for poisoned page as empty has been added to the -mm mm-hotfixes-unstable branch. Its filename is codetag-debug-mark-codetags-for-poisoned-page-as-empty.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Hao Ge <gehao(a)kylinos.cn> Subject: codetag: debug: mark codetags for poisoned page as empty Date: Mon, 26 Aug 2024 00:36:49 +0800 When PG_hwpoison pages are freed they are treated differently in free_pages_prepare() and instead of being released they are isolated. Page allocation tag counters are decremented at this point since the page is considered not in use. Later on when such pages are released by unpoison_memory(), the allocation tag counters will be decremented again and the following warning gets reported: [ 113.930443][ T3282] ------------[ cut here ]------------ [ 113.931105][ T3282] alloc_tag was not set [ 113.931576][ T3282] WARNING: CPU: 2 PID: 3282 at ./include/linux/alloc_tag.h:130 pgalloc_tag_sub.part.66+0x154/0x164 [ 113.932866][ T3282] Modules linked in: hwpoison_inject fuse ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ebtable_nat ebtable_broute ip6table_nat ip6table_man4 [ 113.941638][ T3282] CPU: 2 UID: 0 PID: 3282 Comm: madvise11 Kdump: loaded Tainted: G W 6.11.0-rc4-dirty #18 [ 113.943003][ T3282] Tainted: [W]=WARN [ 113.943453][ T3282] Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022 [ 113.944378][ T3282] pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 113.945319][ T3282] pc : pgalloc_tag_sub.part.66+0x154/0x164 [ 113.946016][ T3282] lr : pgalloc_tag_sub.part.66+0x154/0x164 [ 113.946706][ T3282] sp : ffff800087093a10 [ 113.947197][ T3282] x29: ffff800087093a10 x28: ffff0000d7a9d400 x27: ffff80008249f0a0 [ 113.948165][ T3282] x26: 0000000000000000 x25: ffff80008249f2b0 x24: 0000000000000000 [ 113.949134][ T3282] x23: 0000000000000001 x22: 0000000000000001 x21: 0000000000000000 [ 113.950597][ T3282] x20: ffff0000c08fcad8 x19: ffff80008251e000 x18: ffffffffffffffff [ 113.952207][ T3282] x17: 0000000000000000 x16: 0000000000000000 x15: ffff800081746210 [ 113.953161][ T3282] x14: 0000000000000000 x13: 205d323832335420 x12: 5b5d353031313339 [ 113.954120][ T3282] x11: ffff800087093500 x10: 000000000000005d x9 : 00000000ffffffd0 [ 113.955078][ T3282] x8 : 7f7f7f7f7f7f7f7f x7 : ffff80008236ba90 x6 : c0000000ffff7fff [ 113.956036][ T3282] x5 : ffff000b34bf4dc8 x4 : ffff8000820aba90 x3 : 0000000000000001 [ 113.956994][ T3282] x2 : ffff800ab320f000 x1 : 841d1e35ac932e00 x0 : 0000000000000000 [ 113.957962][ T3282] Call trace: [ 113.958350][ T3282] pgalloc_tag_sub.part.66+0x154/0x164 [ 113.959000][ T3282] pgalloc_tag_sub+0x14/0x1c [ 113.959539][ T3282] free_unref_page+0xf4/0x4b8 [ 113.960096][ T3282] __folio_put+0xd4/0x120 [ 113.960614][ T3282] folio_put+0x24/0x50 [ 113.961103][ T3282] unpoison_memory+0x4f0/0x5b0 [ 113.961678][ T3282] hwpoison_unpoison+0x30/0x48 [hwpoison_inject] [ 113.962436][ T3282] simple_attr_write_xsigned.isra.34+0xec/0x1cc [ 113.963183][ T3282] simple_attr_write+0x38/0x48 [ 113.963750][ T3282] debugfs_attr_write+0x54/0x80 [ 113.964330][ T3282] full_proxy_write+0x68/0x98 [ 113.964880][ T3282] vfs_write+0xdc/0x4d0 [ 113.965372][ T3282] ksys_write+0x78/0x100 [ 113.965875][ T3282] __arm64_sys_write+0x24/0x30 [ 113.966440][ T3282] invoke_syscall+0x7c/0x104 [ 113.966984][ T3282] el0_svc_common.constprop.1+0x88/0x104 [ 113.967652][ T3282] do_el0_svc+0x2c/0x38 [ 113.968893][ T3282] el0_svc+0x3c/0x1b8 [ 113.969379][ T3282] el0t_64_sync_handler+0x98/0xbc [ 113.969980][ T3282] el0t_64_sync+0x19c/0x1a0 [ 113.970511][ T3282] ---[ end trace 0000000000000000 ]--- To fix this, clear the page tag reference after the page got isolated and accounted for. Link: https://lkml.kernel.org/r/20240825163649.33294-1-hao.ge@linux.dev Fixes: d224eb0287fb ("codetag: debug: mark codetags for reserved pages as empty") Signed-off-by: Hao Ge <gehao(a)kylinos.cn> Reviewed-by: Miaohe Lin <linmiaohe(a)huawei.com> Acked-by: Suren Baghdasaryan <surenb(a)google.com> Cc: David Hildenbrand <david(a)redhat.com> Cc: Hao Ge <gehao(a)kylinos.cn> Cc: Kent Overstreet <kent.overstreet(a)linux.dev> Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com> Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com> Cc: <stable(a)vger.kernel.org> [6.10+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/page_alloc.c | 7 +++++++ 1 file changed, 7 insertions(+) --- a/mm/page_alloc.c~codetag-debug-mark-codetags-for-poisoned-page-as-empty +++ a/mm/page_alloc.c @@ -1054,6 +1054,13 @@ __always_inline bool free_pages_prepare( reset_page_owner(page, order); page_table_check_free(page, order); pgalloc_tag_sub(page, 1 << order); + + /* + * The page is isolated and accounted for. + * Mark the codetag as empty to avoid accounting error + * when the page is freed by unpoison_memory(). + */ + clear_page_tag_ref(page); return false; } _ Patches currently in -mm which might be from gehao(a)kylinos.cn are mm-slub-add-check-for-s-flags-in-the-alloc_tagging_slab_free_hook.patch codetag-debug-mark-codetags-for-poisoned-page-as-empty.patch mm-cma-change-the-addition-of-totalcma_pages-in-the-cma_init_reserved_mem.patch

10 months, 2 weeks

1
0
0 0

[PATCH 1/1] PM / devfreq: Fix buffer overflow in trans_stat_show

by Koichiro Den

From: Christian Marangi <ansuelsmth(a)gmail.com> Fix buffer overflow in trans_stat_show(). Convert simple snprintf to the more secure scnprintf with size of PAGE_SIZE. Add condition checking if we are exceeding PAGE_SIZE and exit early from loop. Also add at the end a warning that we exceeded PAGE_SIZE and that stats is disabled. Return -EFBIG in the case where we don't have enough space to write the full transition table. Also document in the ABI that this function can return -EFBIG error. Link: https://lore.kernel.org/all/20231024183016.14648-2-ansuelsmth@gmail.com/ Cc: stable(a)vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218041 Fixes: e552bbaf5b98 ("PM / devfreq: Add sysfs node for representing frequency transition information.") Signed-off-by: Christian Marangi <ansuelsmth(a)gmail.com> Signed-off-by: Chanwoo Choi <cw00.choi(a)samsung.com> (backported from commit 08e23d05fa6dc4fc13da0ccf09defdd4bbc92ff4) [koichiroden: Adjusted context due to missing commits: commit b5d281f6c16d ("PM / devfreq: Rework freq_table to be local to devfreq struct") commit a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor") commit 483d557ee9a3 ("PM / devfreq: Clean up the devfreq instance name in sysfs attr") commit 1ebd0bc0e8ad ("PM / devfreq: Move statistics to separate struct devfreq_stats") commit 14a343968199 ("PM / devfreq: Add clearing transitions stats") commit b76b3479dab9 ("PM / devfreq: Change time stats to 64-bit") commit 5c0f6c795957 ("PM / devfreq: Add new interrupt_driven flag for governors")] CVE-2023-52614 Signed-off-by: Koichiro Den <koichiro.den(a)canonical.com> --- Documentation/ABI/testing/sysfs-class-devfreq | 2 + drivers/devfreq/devfreq.c | 60 +++++++++++++------ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 75897e2fde43..f95b69551b60 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -61,6 +61,8 @@ Description: In order to activate this ABI, the devfreq target device driver should provide the list of available frequencies with its profile. + If the transition table is bigger than PAGE_SIZE, reading + this will return an -EFBIG error. What: /sys/class/devfreq/.../userspace/set_freq Date: September 2011 diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 31e6cb5211bc..7a6115c23ec8 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1403,12 +1403,12 @@ static ssize_t trans_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct devfreq *devfreq = to_devfreq(dev); - ssize_t len; + ssize_t len = 0; int i, j; unsigned int max_state = devfreq->profile->max_state; if (max_state == 0) - return sprintf(buf, "Not Supported.\n"); + return scnprintf(buf, PAGE_SIZE, "Not Supported.\n"); mutex_lock(&devfreq->lock); if (!devfreq->stop_polling && @@ -1418,32 +1418,54 @@ static ssize_t trans_stat_show(struct device *dev, } mutex_unlock(&devfreq->lock); - len = sprintf(buf, " From : To\n"); - len += sprintf(buf + len, " :"); - for (i = 0; i < max_state; i++) - len += sprintf(buf + len, "%10lu", - devfreq->profile->freq_table[i]); + len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " :"); + for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu", + devfreq->profile->freq_table[i]); + } + if (len >= PAGE_SIZE - 1) + return PAGE_SIZE - 1; - len += sprintf(buf + len, " time(ms)\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " time(ms)\n"); for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; if (devfreq->profile->freq_table[i] == devfreq->previous_freq) { - len += sprintf(buf + len, "*"); + len += scnprintf(buf + len, PAGE_SIZE - len, "*"); } else { - len += sprintf(buf + len, " "); + len += scnprintf(buf + len, PAGE_SIZE - len, " "); + } + if (len >= PAGE_SIZE - 1) + break; + + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu:", + devfreq->profile->freq_table[i]); + for (j = 0; j < max_state; j++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10u", + devfreq->trans_table[(i * max_state) + j]); } - len += sprintf(buf + len, "%10lu:", - devfreq->profile->freq_table[i]); - for (j = 0; j < max_state; j++) - len += sprintf(buf + len, "%10u", - devfreq->trans_table[(i * max_state) + j]); - len += sprintf(buf + len, "%10u\n", - jiffies_to_msecs(devfreq->time_in_state[i])); + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10u\n", + jiffies_to_msecs(devfreq->time_in_state[i])); + } + + if (len < PAGE_SIZE - 1) + len += scnprintf(buf + len, PAGE_SIZE - len, "Total transition : %u\n", + devfreq->total_trans); + + if (len >= PAGE_SIZE - 1) { + pr_warn_once("devfreq transition table exceeds PAGE_SIZE. Disabling\n"); + return -EFBIG; } - len += sprintf(buf + len, "Total transition : %u\n", - devfreq->total_trans); return len; } static DEVICE_ATTR_RO(trans_stat); -- 2.43.0

10 months, 2 weeks

1
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror August 2024