There're a bunch of things that were wrong:
- Reading uffd-wp bit from a swap entry should use pte_swp_uffd_wp()
rather than huge_pte_uffd_wp().
- When copying over a pte, we should drop uffd-wp bit when
!EVENT_FORK (aka, when !userfaultfd_wp(dst_vma)).
- When doing early CoW for private hugetlb (e.g. when the parent page was
pinned), uffd-wp bit should be properly carried over if necessary.
No bug reported probably because most people do not even care about these
corner cases, but they are still bugs and can be exposed by the recent unit
tests introduced, so fix all of them in one shot.
Cc: linux-stable <stable(a)vger.kernel.org>
Fixes: bc70fbf269fd ("mm/hugetlb: handle uffd-wp during fork()")
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Signed-off-by: Peter Xu <peterx(a)redhat.com>
---
mm/hugetlb.c | 24 +++++++++++++++---------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f16b25b1a6b9..0213efaf31be 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4953,11 +4953,15 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
static void
hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
- struct folio *new_folio)
+ struct folio *new_folio, pte_t old)
{
+ pte_t newpte = make_huge_pte(vma, &new_folio->page, 1);
+
__folio_mark_uptodate(new_folio);
hugepage_add_new_anon_rmap(new_folio, vma, addr);
- set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, &new_folio->page, 1));
+ if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old))
+ newpte = huge_pte_mkuffd_wp(newpte);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, newpte);
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
}
@@ -5032,14 +5036,12 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
*/
;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
- bool uffd_wp = huge_pte_uffd_wp(entry);
-
- if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_hugetlb_entry_migration(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry);
- bool uffd_wp = huge_pte_uffd_wp(entry);
+ bool uffd_wp = pte_swp_uffd_wp(entry);
if (!is_readable_migration_entry(swp_entry) && cow) {
/*
@@ -5050,10 +5052,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
swp_offset(swp_entry));
entry = swp_entry_to_pte(swp_entry);
if (userfaultfd_wp(src_vma) && uffd_wp)
- entry = huge_pte_mkuffd_wp(entry);
+ entry = pte_swp_mkuffd_wp(entry);
set_huge_pte_at(src, addr, src_pte, entry);
}
- if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_pte_marker(entry))) {
@@ -5114,7 +5116,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
/* huge_ptep of dst_pte won't change as in child */
goto again;
}
- hugetlb_install_folio(dst_vma, dst_pte, addr, new_folio);
+ hugetlb_install_folio(dst_vma, dst_pte, addr,
+ new_folio, src_pte_old);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
continue;
@@ -5132,6 +5135,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
entry = huge_pte_wrprotect(entry);
}
+ if (!userfaultfd_wp(dst_vma))
+ entry = huge_pte_clear_uffd_wp(entry);
+
set_huge_pte_at(dst, addr, dst_pte, entry);
hugetlb_count_add(npages, dst);
}
--
2.39.1
Several calls to ci_dpm_fini() will attempt to free resources that
either have been freed before or haven't been allocated yet. This
may lead to undefined or dangerous behaviour.
For instance, if r600_parse_extended_power_table() fails, it might
call r600_free_extended_power_table() as will ci_dpm_fini() later
during error handling.
Fix this by only freeing pointers to objects previously allocated.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: cc8dbbb4f62a ("drm/radeon: add dpm support for CI dGPUs (v2)")
Cc: stable(a)vger.kernel.org
Co-developed-by: Natalia Petrova <n.petrova(a)fintech.ru>
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
v2: free only resouces allocated prior, do not remove ci_dpm_fini()
or other deallocating calls altogether; fix commit message.
v1: https://lore.kernel.org/all/20230403182808.8699-1-n.zhandarovich@fintech.ru/
drivers/gpu/drm/radeon/ci_dpm.c | 28 ++++++++++++++++++++--------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 8ef25ab305ae..b8f4dac68d85 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -5517,6 +5517,7 @@ static int ci_parse_power_table(struct radeon_device *rdev)
u8 frev, crev;
u8 *power_state_offset;
struct ci_ps *ps;
+ int ret;
if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
&frev, &crev, &data_offset))
@@ -5546,11 +5547,15 @@ static int ci_parse_power_table(struct radeon_device *rdev)
non_clock_array_index = power_state->v2.nonClockInfoIndex;
non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
&non_clock_info_array->nonClockInfo[non_clock_array_index];
- if (!rdev->pm.power_state[i].clock_info)
- return -EINVAL;
+ if (!rdev->pm.power_state[i].clock_info) {
+ ret = -EINVAL;
+ goto err_free_ps;
+ }
ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL);
- if (ps == NULL)
- return -ENOMEM;
+ if (ps == NULL) {
+ ret = -ENOMEM;
+ goto err_free_ps;
+ }
rdev->pm.dpm.ps[i].ps_priv = ps;
ci_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i],
non_clock_info,
@@ -5590,6 +5595,12 @@ static int ci_parse_power_table(struct radeon_device *rdev)
}
return 0;
+
+err_free_ps:
+ for (i = 0; i < rdev->pm.dpm.num_ps; i++)
+ kfree(rdev->pm.dpm.ps[i].ps_priv);
+ kfree(rdev->pm.dpm.ps);
+ return ret;
}
static int ci_get_vbios_boot_values(struct radeon_device *rdev,
@@ -5678,25 +5689,26 @@ int ci_dpm_init(struct radeon_device *rdev)
ret = ci_get_vbios_boot_values(rdev, &pi->vbios_boot_state);
if (ret) {
- ci_dpm_fini(rdev);
+ kfree(rdev->pm.dpm.priv);
return ret;
}
ret = r600_get_platform_caps(rdev);
if (ret) {
- ci_dpm_fini(rdev);
+ kfree(rdev->pm.dpm.priv);
return ret;
}
ret = r600_parse_extended_power_table(rdev);
if (ret) {
- ci_dpm_fini(rdev);
+ kfree(rdev->pm.dpm.priv);
return ret;
}
ret = ci_parse_power_table(rdev);
if (ret) {
- ci_dpm_fini(rdev);
+ kfree(rdev->pm.dpm.priv);
+ r600_free_extended_power_table(rdev);
return ret;
}
Smatch detected a double free path because lpfc_nlp_not_used releases an
ndlp object before reaching lpfc_nlp_put at the end of
lpfc_cmpl_els_logo_acc.
Remove the outdated lpfc_nlp_not_used routine. In
lpfc_mbx_cmpl_ns_reg_login, replace the call with lpfc_nlp_put. In
lpfc_cmpl_els_logo_acc, replace the call with lpfc_unreg_rpi and keep the
lpfc_nlp_put at the end of the routine. If ndlp's rpi was registered, then
lpfc_unreg_rpi's completion routine performs the final ndlp clean up after
lpfc_nlp_put is called from lpfc_cmpl_els_logo_acc. Otherwise if ndlp has
no rpi registered, the lpfc_nlp_put at the end of lpfc_cmpl_els_logo_acc is
the final ndlp clean up.
Fixes: 4430f7fd09ec ("scsi: lpfc: Rework locations of ndlp reference taking")
Cc: <stable(a)vger.kernel.org> # v5.11+
Reported-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/all/Y3OefhyyJNKH%2Fiaf@kili/
Signed-off-by: Justin Tee <justin.tee(a)broadcom.com>
---
drivers/scsi/lpfc/lpfc_crtn.h | 1 -
drivers/scsi/lpfc/lpfc_els.c | 30 +++++++-----------------------
drivers/scsi/lpfc/lpfc_hbadisc.c | 24 +++---------------------
3 files changed, 10 insertions(+), 45 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index b833b983e69d..0b9edde26abd 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -134,7 +134,6 @@ void lpfc_check_nlp_post_devloss(struct lpfc_vport *vport,
struct lpfc_nodelist *ndlp);
void lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
struct lpfc_iocbq *rspiocb);
-int lpfc_nlp_not_used(struct lpfc_nodelist *ndlp);
struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_vport *, uint32_t);
void lpfc_disc_list_loopmap(struct lpfc_vport *);
void lpfc_disc_start(struct lpfc_vport *);
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 6a15f879e517..a3c8550e9985 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -5205,14 +5205,9 @@ lpfc_els_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *elsiocb)
*
* This routine is the completion callback function to the Logout (LOGO)
* Accept (ACC) Response ELS command. This routine is invoked to indicate
- * the completion of the LOGO process. It invokes the lpfc_nlp_not_used() to
- * release the ndlp if it has the last reference remaining (reference count
- * is 1). If succeeded (meaning ndlp released), it sets the iocb ndlp
- * field to NULL to inform the following lpfc_els_free_iocb() routine no
- * ndlp reference count needs to be decremented. Otherwise, the ndlp
- * reference use-count shall be decremented by the lpfc_els_free_iocb()
- * routine. Finally, the lpfc_els_free_iocb() is invoked to release the
- * IOCB data structure.
+ * the completion of the LOGO process. If the node has transitioned to NPR,
+ * this routine unregisters the RPI if it is still registered. The
+ * lpfc_els_free_iocb() is invoked to release the IOCB data structure.
**/
static void
lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
@@ -5253,19 +5248,9 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
(ndlp->nlp_last_elscmd == ELS_CMD_PLOGI))
goto out;
- /* NPort Recovery mode or node is just allocated */
- if (!lpfc_nlp_not_used(ndlp)) {
- /* A LOGO is completing and the node is in NPR state.
- * Just unregister the RPI because the node is still
- * required.
- */
+ if (ndlp->nlp_flag & NLP_RPI_REGISTERED)
lpfc_unreg_rpi(vport, ndlp);
- } else {
- /* Indicate the node has already released, should
- * not reference to it from within lpfc_els_free_iocb.
- */
- cmdiocb->ndlp = NULL;
- }
+
}
out:
/*
@@ -5285,9 +5270,8 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
* RPI (Remote Port Index) mailbox command to the @phba. It simply releases
* the associated lpfc Direct Memory Access (DMA) buffer back to the pool and
* decrements the ndlp reference count held for this completion callback
- * function. After that, it invokes the lpfc_nlp_not_used() to check
- * whether there is only one reference left on the ndlp. If so, it will
- * perform one more decrement and trigger the release of the ndlp.
+ * function. After that, it invokes the lpfc_drop_node to check
+ * whether it is appropriate to release the node.
**/
void
lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 5ba3a9ad9501..67bfdddb897c 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -4333,13 +4333,14 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
/* If the node is not registered with the scsi or nvme
* transport, remove the fabric node. The failed reg_login
- * is terminal.
+ * is terminal and forces the removal of the last node
+ * reference.
*/
if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) {
spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
spin_unlock_irq(&ndlp->lock);
- lpfc_nlp_not_used(ndlp);
+ lpfc_nlp_put(ndlp);
}
if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
@@ -6704,25 +6705,6 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp)
return ndlp ? kref_put(&ndlp->kref, lpfc_nlp_release) : 0;
}
-/* This routine free's the specified nodelist if it is not in use
- * by any other discovery thread. This routine returns 1 if the
- * ndlp has been freed. A return value of 0 indicates the ndlp is
- * not yet been released.
- */
-int
-lpfc_nlp_not_used(struct lpfc_nodelist *ndlp)
-{
- lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE,
- "node not used: did:x%x flg:x%x refcnt:x%x",
- ndlp->nlp_DID, ndlp->nlp_flag,
- kref_read(&ndlp->kref));
-
- if (kref_read(&ndlp->kref) == 1)
- if (lpfc_nlp_put(ndlp))
- return 1;
- return 0;
-}
-
/**
* lpfc_fcf_inuse - Check if FCF can be unregistered.
* @phba: Pointer to hba context object.
--
2.38.0
Previously, capability was checked using capable(), which verified that the
caller of the ioctl system call had the required capability. In addition,
the result of the check would be stored in the HCI_SOCK_TRUSTED flag,
making it persistent for the socket.
However, malicious programs can abuse this approach by deliberately sharing
an HCI socket with a privileged task. The HCI socket will be marked as
trusted when the privileged task occasionally makes an ioctl call.
This problem can be solved by using sk_capable() to check capability, which
ensures that not only the current task but also the socket opener has the
specified capability, thus reducing the risk of privilege escalation
through the previously identified vulnerability.
Cc: stable(a)vger.kernel.org
Fixes: f81f5b2db869 ("Bluetooth: Send control open and close messages for HCI raw sockets")
Signed-off-by: Ruihan Li <lrh2000(a)pku.edu.cn>
---
net/bluetooth/hci_sock.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 065812232..f597fe0db 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1003,7 +1003,14 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
if (hci_sock_gen_cookie(sk)) {
struct sk_buff *skb;
- if (capable(CAP_NET_ADMIN))
+ /* Perform careful checks before setting the HCI_SOCK_TRUSTED
+ * flag. Make sure that not only the current task but also
+ * the socket opener has the required capability, since
+ * privileged programs can be tricked into making ioctl calls
+ * on HCI sockets, and the socket should not be marked as
+ * trusted simply because the ioctl caller is privileged.
+ */
+ if (sk_capable(sk, CAP_NET_ADMIN))
hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
/* Send event to monitor */
--
2.40.0
commit ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 upstream.
After a successful cpuset_can_attach() call which increments the
attach_in_progress flag, either cpuset_cancel_attach() or cpuset_attach()
will be called later. In cpuset_attach(), tasks in cpuset_attach_wq,
if present, will be woken up at the end. That is not the case in
cpuset_cancel_attach(). So missed wakeup is possible if the attach
operation is somehow cancelled. Fix that by doing the wakeup in
cpuset_cancel_attach() as well.
Fixes: e44193d39e8d ("cpuset: let hotplug propagation work wait for task attaching")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Reviewed-by: Michal Koutný <mkoutny(a)suse.com>
Cc: stable(a)vger.kernel.org # v3.11+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
---
kernel/cgroup/cpuset.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 0b82e3857d33..20c1ccc6a0bb 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1508,7 +1508,9 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
cs = css_cs(css);
mutex_lock(&cpuset_mutex);
- css_cs(css)->attach_in_progress--;
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
mutex_unlock(&cpuset_mutex);
}
--
2.31.1
commit ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 upstream.
After a successful cpuset_can_attach() call which increments the
attach_in_progress flag, either cpuset_cancel_attach() or cpuset_attach()
will be called later. In cpuset_attach(), tasks in cpuset_attach_wq,
if present, will be woken up at the end. That is not the case in
cpuset_cancel_attach(). So missed wakeup is possible if the attach
operation is somehow cancelled. Fix that by doing the wakeup in
cpuset_cancel_attach() as well.
Fixes: e44193d39e8d ("cpuset: let hotplug propagation work wait for task attaching")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Reviewed-by: Michal Koutný <mkoutny(a)suse.com>
Cc: stable(a)vger.kernel.org # v3.11+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
---
kernel/cgroup/cpuset.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index c6d412cebc43..3067d3e5a51d 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1504,7 +1504,9 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
cs = css_cs(css);
mutex_lock(&cpuset_mutex);
- css_cs(css)->attach_in_progress--;
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
mutex_unlock(&cpuset_mutex);
}
--
2.31.1
Hi,
please pick
1e020e1b96afd ("ubi: Fix failure attaching when vid_hdr offset equals to (sub)page size")
from linux-next to stable trees.
The commit fixes a problem with another recent commit
1b42b1a36fc94 ("ubi: ensure that VID header offset ... size")
which has already made it to stable trees and thereby broke attaching
UBI and hence renders devices unbootable.
As a temporary fix I have applied this patch downstream in OpenWrt.
Thank you!
Best regards
Daniel
----- Forwarded message from Daniel Golle <daniel(a)makrotopia.org> -----
Date: Sat, 15 Apr 2023 00:12:46 +0100
From: Daniel Golle <daniel(a)makrotopia.org>
To: Richard Weinberger <richard(a)nod.at>
Cc: Miquel Raynal <miquel.raynal(a)bootlin.com>, chengzhihao1 <chengzhihao1(a)huawei.com>, Nicolas Schichan <nschichan(a)freebox.fr>, George Kennedy <george.kennedy(a)oracle.com>, linux-kernel <linux-kernel(a)vger.kernel.org>, linux-mtd
<linux-mtd(a)lists.infradead.org>, Sascha Hauer <s.hauer(a)pengutronix.de>, yi zhang <yi.zhang(a)huawei.com>
Subject: Re: [PATCH -next v3] ubi: Fix failure attaching when vid_hdr offset equals to (sub)page size
Hi Richard,
On Wed, Mar 29, 2023 at 11:33:40PM +0200, Richard Weinberger wrote:
> ----- Ursprüngliche Mail -----
> > Von: "Miquel Raynal" <miquel.raynal(a)bootlin.com>
> >> Thanks for testing.
> >>
> >> > Tested-by: Nicolas Schichan <nschichan(a)freebox.fr>
> >
> > Same here.
> >
> > Tested-by: Miquel Raynal <miquel.raynal(a)bootlin.com> # v5.10, v4.19
>
> Applied to next, PR will follow soon.
As stable linux trees are affected I wonder when this will hit
linux-stable, ie. will it be part of 5.15.108, for example?
Cheers
Daniel
----- End forwarded message -----