The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8bdd9ef7e9b1b2a73e394712b72b22055e0e26c3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024081222-process-suspect-d983@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8bdd9ef7e9b1 ("drm/i915/gem: Fix Virtual Memory mapping boundaries calculation")
8e4ee5e87ce6 ("drm/i915: Wrap all access to i915_vma.node.start|size")
3bb6a44251b4 ("drm/i915: Rename ggtt_view as gtt_view")
d976521a995a ("drm/i915: extend i915_vma_pin_iomap()")
d63ddca7c581 ("drm/i915: Update tiled blits selftest")
a0ed9c95cce6 ("drm/i915/gt: Use XY_FAST_COLOR_BLT to clear obj on graphics ver 12+")
fd5803e5eebe ("drm/i915/gt: use engine instance directly for offset")
892bfb8a604d ("drm/i915/fbdev: fixup setting screen_size")
c674c5b9342e ("drm/i915/xehp: CCS should use RCS setup functions")
30b9d1b3ef37 ("drm/i915: add I915_BO_ALLOC_GPU_ONLY")
3312a4ac8a46 ("drm/i915/ttm: require mappable by default")
8fbf28934acf ("drm/i915/ttm: fixup the mock_bo")
db927686e43f ("Merge drm/drm-next into drm-intel-gt-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8bdd9ef7e9b1b2a73e394712b72b22055e0e26c3 Mon Sep 17 00:00:00 2001
From: Andi Shyti <andi.shyti(a)linux.intel.com>
Date: Fri, 2 Aug 2024 10:38:50 +0200
Subject: [PATCH] drm/i915/gem: Fix Virtual Memory mapping boundaries
calculation
Calculating the size of the mapped area as the lesser value
between the requested size and the actual size does not consider
the partial mapping offset. This can cause page fault access.
Fix the calculation of the starting and ending addresses, the
total size is now deduced from the difference between the end and
start addresses.
Additionally, the calculations have been rewritten in a clearer
and more understandable form.
Fixes: c58305af1835 ("drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass")
Reported-by: Jann Horn <jannh(a)google.com>
Co-developed-by: Chris Wilson <chris.p.wilson(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris.p.wilson(a)linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v4.9+
Reviewed-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Jonathan Cavitt <Jonathan.cavitt(a)intel.com>
[Joonas: Add Requires: tag]
Requires: 60a2066c5005 ("drm/i915/gem: Adjust vma offset for framebuffer mmap offset")
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240802083850.103694-3-andi.…
(cherry picked from commit 97b6784753da06d9d40232328efc5c5367e53417)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index ce10dd259812..cac6d4184506 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -290,6 +290,41 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
return i915_error_to_vmf_fault(err);
}
+static void set_address_limits(struct vm_area_struct *area,
+ struct i915_vma *vma,
+ unsigned long obj_offset,
+ unsigned long *start_vaddr,
+ unsigned long *end_vaddr)
+{
+ unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */
+ long start, end; /* memory boundaries */
+
+ /*
+ * Let's move into the ">> PAGE_SHIFT"
+ * domain to be sure not to lose bits
+ */
+ vm_start = area->vm_start >> PAGE_SHIFT;
+ vm_end = area->vm_end >> PAGE_SHIFT;
+ vma_size = vma->size >> PAGE_SHIFT;
+
+ /*
+ * Calculate the memory boundaries by considering the offset
+ * provided by the user during memory mapping and the offset
+ * provided for the partial mapping.
+ */
+ start = vm_start;
+ start -= obj_offset;
+ start += vma->gtt_view.partial.offset;
+ end = start + vma_size;
+
+ start = max_t(long, start, vm_start);
+ end = min_t(long, end, vm_end);
+
+ /* Let's move back into the "<< PAGE_SHIFT" domain */
+ *start_vaddr = (unsigned long)start << PAGE_SHIFT;
+ *end_vaddr = (unsigned long)end << PAGE_SHIFT;
+}
+
static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
{
#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
@@ -302,14 +337,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
bool write = area->vm_flags & VM_WRITE;
struct i915_gem_ww_ctx ww;
+ unsigned long obj_offset;
+ unsigned long start, end; /* memory boundaries */
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
+ unsigned long pfn;
int srcu;
int ret;
- /* We don't use vmf->pgoff since that has the fake offset */
+ obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node);
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+ page_offset += obj_offset;
trace_i915_gem_object_fault(obj, page_offset, true, write);
@@ -402,12 +441,14 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
if (ret)
goto err_unpin;
+ set_address_limits(area, vma, obj_offset, &start, &end);
+
+ pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT;
+ pfn += (start - area->vm_start) >> PAGE_SHIFT;
+ pfn += obj_offset - vma->gtt_view.partial.offset;
+
/* Finally, remap it using the new GTT offset */
- ret = remap_io_mapping(area,
- area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT),
- (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT,
- min_t(u64, vma->size, area->vm_end - area->vm_start),
- &ggtt->iomap);
+ ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap);
if (ret)
goto err_fence;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 89add40066f9ed9abe5f7f886fe5789ff7e0c50e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024081322-yoyo-proving-7a44@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
89add40066f9 ("net: drop bad gso csum_start and offset in virtio_net_hdr")
fc8b2a619469 ("net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation")
9840036786d9 ("gso: fix dodgy bit handling for GSO_UDP_L4")
cf9acc90c80e ("net: virtio_net_hdr_to_skb: count transport header in UFO")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 89add40066f9ed9abe5f7f886fe5789ff7e0c50e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb(a)google.com>
Date: Mon, 29 Jul 2024 16:10:12 -0400
Subject: [PATCH] net: drop bad gso csum_start and offset in virtio_net_hdr
Tighten csum_start and csum_offset checks in virtio_net_hdr_to_skb
for GSO packets.
The function already checks that a checksum requested with
VIRTIO_NET_HDR_F_NEEDS_CSUM is in skb linear. But for GSO packets
this might not hold for segs after segmentation.
Syzkaller demonstrated to reach this warning in skb_checksum_help
offset = skb_checksum_start_offset(skb);
ret = -EINVAL;
if (WARN_ON_ONCE(offset >= skb_headlen(skb)))
By injecting a TSO packet:
WARNING: CPU: 1 PID: 3539 at net/core/dev.c:3284 skb_checksum_help+0x3d0/0x5b0
ip_do_fragment+0x209/0x1b20 net/ipv4/ip_output.c:774
ip_finish_output_gso net/ipv4/ip_output.c:279 [inline]
__ip_finish_output+0x2bd/0x4b0 net/ipv4/ip_output.c:301
iptunnel_xmit+0x50c/0x930 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x2296/0x2c70 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x759/0xa60 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4850 [inline]
netdev_start_xmit include/linux/netdevice.h:4864 [inline]
xmit_one net/core/dev.c:3595 [inline]
dev_hard_start_xmit+0x261/0x8c0 net/core/dev.c:3611
__dev_queue_xmit+0x1b97/0x3c90 net/core/dev.c:4261
packet_snd net/packet/af_packet.c:3073 [inline]
The geometry of the bad input packet at tcp_gso_segment:
[ 52.003050][ T8403] skb len=12202 headroom=244 headlen=12093 tailroom=0
[ 52.003050][ T8403] mac=(168,24) mac_len=24 net=(192,52) trans=244
[ 52.003050][ T8403] shinfo(txflags=0 nr_frags=1 gso(size=1552 type=3 segs=0))
[ 52.003050][ T8403] csum(0x60000c7 start=199 offset=1536
ip_summed=3 complete_sw=0 valid=0 level=0)
Mitigate with stricter input validation.
csum_offset: for GSO packets, deduce the correct value from gso_type.
This is already done for USO. Extend it to TSO. Let UFO be:
udp[46]_ufo_fragment ignores these fields and always computes the
checksum in software.
csum_start: finding the real offset requires parsing to the transport
header. Do not add a parser, use existing segmentation parsing. Thanks
to SKB_GSO_DODGY, that also catches bad packets that are hw offloaded.
Again test both TSO and USO. Do not test UFO for the above reason, and
do not test UDP tunnel offload.
GSO packet are almost always CHECKSUM_PARTIAL. USO packets may be
CHECKSUM_NONE since commit 10154dbded6d6 ("udp: Allow GSO transmit
from devices with no checksum offload"), but then still these fields
are initialized correctly in udp4_hwcsum/udp6_hwcsum_outgoing. So no
need to test for ip_summed == CHECKSUM_PARTIAL first.
This revises an existing fix mentioned in the Fixes tag, which broke
small packets with GSO offload, as detected by kselftests.
Link: https://syzkaller.appspot.com/bug?extid=e1db31216c789f552871
Link: https://lore.kernel.org/netdev/20240723223109.2196886-1-kuba@kernel.org
Fixes: e269d79c7d35 ("net: missing check virtio")
Cc: stable(a)vger.kernel.org
Signed-off-by: Willem de Bruijn <willemb(a)google.com>
Link: https://patch.msgid.link/20240729201108.1615114-1-willemdebruijn.kernel@gma…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index d1d7825318c3..6c395a2600e8 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
unsigned int thlen = 0;
unsigned int p_off = 0;
unsigned int ip_proto;
- u64 ret, remainder, gso_size;
if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
@@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16));
- if (hdr->gso_size) {
- gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
- ret = div64_u64_rem(skb->len, gso_size, &remainder);
- if (!(ret && (hdr->gso_size > needed) &&
- ((remainder > needed) || (remainder == 0)))) {
- return -EINVAL;
- }
- skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG;
- }
-
if (!pskb_may_pull(skb, needed))
return -EINVAL;
@@ -182,6 +171,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
if (gso_type != SKB_GSO_UDP_L4)
return -EINVAL;
break;
+ case SKB_GSO_TCPV4:
+ case SKB_GSO_TCPV6:
+ if (skb->csum_offset != offsetof(struct tcphdr, check))
+ return -EINVAL;
+ break;
}
/* Kernel has a special handling for GSO_BY_FRAGS. */
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 4b791e74529e..e4ad3311e148 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -140,6 +140,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (thlen < sizeof(*th))
goto out;
+ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb)))
+ goto out;
+
if (!pskb_may_pull(skb, thlen))
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index aa2e0a28ca61..bc8a9da750fe 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -278,6 +278,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (unlikely(skb_checksum_start(gso_skb) !=
+ skb_transport_header(gso_skb)))
+ return ERR_PTR(-EINVAL);
+
if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
/* Packet is from an untrusted source, reset gso_segs. */
skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
The pmd_trans_huge() code in mfill_atomic() is wrong in two different
ways depending on kernel version:
1. The pmd_trans_huge() check is racy and can lead to a BUG_ON() (if you hit
the right two race windows) - I've tested this in a kernel build with
some extra mdelay() calls. See the commit message for a description
of the race scenario.
On older kernels (before 6.5), I think the same bug can even
theoretically lead to accessing transhuge page contents as a page table
if you hit the right 5 narrow race windows (I haven't tested this case).
2. On newer kernels (>=6.5), for shmem mappings, khugepaged is allowed
to yank page tables out from under us (though I haven't tested that),
so I think the BUG_ON() checks in mfill_atomic() are just wrong.
I decided to write two separate fixes for these, so that the first fix
can be backported to kernels affected by the first bug.
Signed-off-by: Jann Horn <jannh(a)google.com>
---
Jann Horn (2):
userfaultfd: Fix pmd_trans_huge() recheck race
userfaultfd: Don't BUG_ON() if khugepaged yanks our page table
mm/userfaultfd.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
---
base-commit: d4560686726f7a357922f300fc81f5964be8df04
change-id: 20240812-uffd-thp-flip-fix-20f91f1151b9
--
Jann Horn <jannh(a)google.com>
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 8bdd9ef7e9b1b2a73e394712b72b22055e0e26c3
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024081221-moisture-vaseline-cb8b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
8bdd9ef7e9b1 ("drm/i915/gem: Fix Virtual Memory mapping boundaries calculation")
8e4ee5e87ce6 ("drm/i915: Wrap all access to i915_vma.node.start|size")
3bb6a44251b4 ("drm/i915: Rename ggtt_view as gtt_view")
d976521a995a ("drm/i915: extend i915_vma_pin_iomap()")
d63ddca7c581 ("drm/i915: Update tiled blits selftest")
a0ed9c95cce6 ("drm/i915/gt: Use XY_FAST_COLOR_BLT to clear obj on graphics ver 12+")
fd5803e5eebe ("drm/i915/gt: use engine instance directly for offset")
892bfb8a604d ("drm/i915/fbdev: fixup setting screen_size")
c674c5b9342e ("drm/i915/xehp: CCS should use RCS setup functions")
30b9d1b3ef37 ("drm/i915: add I915_BO_ALLOC_GPU_ONLY")
3312a4ac8a46 ("drm/i915/ttm: require mappable by default")
8fbf28934acf ("drm/i915/ttm: fixup the mock_bo")
db927686e43f ("Merge drm/drm-next into drm-intel-gt-next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8bdd9ef7e9b1b2a73e394712b72b22055e0e26c3 Mon Sep 17 00:00:00 2001
From: Andi Shyti <andi.shyti(a)linux.intel.com>
Date: Fri, 2 Aug 2024 10:38:50 +0200
Subject: [PATCH] drm/i915/gem: Fix Virtual Memory mapping boundaries
calculation
Calculating the size of the mapped area as the lesser value
between the requested size and the actual size does not consider
the partial mapping offset. This can cause page fault access.
Fix the calculation of the starting and ending addresses, the
total size is now deduced from the difference between the end and
start addresses.
Additionally, the calculations have been rewritten in a clearer
and more understandable form.
Fixes: c58305af1835 ("drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass")
Reported-by: Jann Horn <jannh(a)google.com>
Co-developed-by: Chris Wilson <chris.p.wilson(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris.p.wilson(a)linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v4.9+
Reviewed-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Jonathan Cavitt <Jonathan.cavitt(a)intel.com>
[Joonas: Add Requires: tag]
Requires: 60a2066c5005 ("drm/i915/gem: Adjust vma offset for framebuffer mmap offset")
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240802083850.103694-3-andi.…
(cherry picked from commit 97b6784753da06d9d40232328efc5c5367e53417)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index ce10dd259812..cac6d4184506 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -290,6 +290,41 @@ static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
return i915_error_to_vmf_fault(err);
}
+static void set_address_limits(struct vm_area_struct *area,
+ struct i915_vma *vma,
+ unsigned long obj_offset,
+ unsigned long *start_vaddr,
+ unsigned long *end_vaddr)
+{
+ unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */
+ long start, end; /* memory boundaries */
+
+ /*
+ * Let's move into the ">> PAGE_SHIFT"
+ * domain to be sure not to lose bits
+ */
+ vm_start = area->vm_start >> PAGE_SHIFT;
+ vm_end = area->vm_end >> PAGE_SHIFT;
+ vma_size = vma->size >> PAGE_SHIFT;
+
+ /*
+ * Calculate the memory boundaries by considering the offset
+ * provided by the user during memory mapping and the offset
+ * provided for the partial mapping.
+ */
+ start = vm_start;
+ start -= obj_offset;
+ start += vma->gtt_view.partial.offset;
+ end = start + vma_size;
+
+ start = max_t(long, start, vm_start);
+ end = min_t(long, end, vm_end);
+
+ /* Let's move back into the "<< PAGE_SHIFT" domain */
+ *start_vaddr = (unsigned long)start << PAGE_SHIFT;
+ *end_vaddr = (unsigned long)end << PAGE_SHIFT;
+}
+
static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
{
#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
@@ -302,14 +337,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
bool write = area->vm_flags & VM_WRITE;
struct i915_gem_ww_ctx ww;
+ unsigned long obj_offset;
+ unsigned long start, end; /* memory boundaries */
intel_wakeref_t wakeref;
struct i915_vma *vma;
pgoff_t page_offset;
+ unsigned long pfn;
int srcu;
int ret;
- /* We don't use vmf->pgoff since that has the fake offset */
+ obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node);
page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+ page_offset += obj_offset;
trace_i915_gem_object_fault(obj, page_offset, true, write);
@@ -402,12 +441,14 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
if (ret)
goto err_unpin;
+ set_address_limits(area, vma, obj_offset, &start, &end);
+
+ pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT;
+ pfn += (start - area->vm_start) >> PAGE_SHIFT;
+ pfn += obj_offset - vma->gtt_view.partial.offset;
+
/* Finally, remap it using the new GTT offset */
- ret = remap_io_mapping(area,
- area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT),
- (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT,
- min_t(u64, vma->size, area->vm_end - area->vm_start),
- &ggtt->iomap);
+ ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap);
if (ret)
goto err_fence;
Sense data can be in either fixed format or descriptor format.
SAT-6 revision 1, 10.4.6 Control mode page, says that if the D_SENSE bit
is set to zero (i.e., fixed format sense data), then the SATL should
return fixed format sense data for ATA PASS-THROUGH commands.
A lot of user space programs incorrectly assume that the sense data is in
descriptor format, without checking the RESPONSE CODE field of the
returned sense data (to see which format the sense data is in).
The libata SATL has always kept D_SENSE set to zero by default.
(It is however possible to change the value using a MODE SELECT command.)
For failed ATA PASS-THROUGH commands, we correctly generated sense data
according to the D_SENSE bit. However, because of a bug, sense data for
successful ATA PASS-THROUGH commands was always generated in the
descriptor format.
This was fixed to consistently respect D_SENSE for both failed and
successful ATA PASS-THROUGH commands in commit 28ab9769117c ("ata:
libata-scsi: Honor the D_SENSE bit for CK_COND=1 and no error").
After commit 28ab9769117c ("ata: libata-scsi: Honor the D_SENSE bit for
CK_COND=1 and no error"), we started receiving bug reports that we broke
these user space programs (these user space programs must never have
encountered a failing command, as the sense data for failing commands has
always correctly respected D_SENSE, which by default meant fixed format).
Since a lot of user space programs seem to assume that the sense data is
in descriptor format (without checking the type), let's simply change the
default to have D_SENSE set to one by default.
That way:
-Broken user space programs will see no regression.
-Both failed and successful ATA PASS-THROUGH commands will respect D_SENSE,
as per SAT-6 revision 1.
-Apparently it seems way more common for user space applications to assume
that the sense data is in descriptor format, rather than fixed format.
(A user space program should of course support both, and check the
RESPONSE CODE field to see which format the returned sense data is in.)
Cc: stable(a)vger.kernel.org # 4.19+
Reported-by: Stephan Eisvogel <eisvogel(a)seitics.de>
Reported-by: Christian Heusel <christian(a)heusel.eu>
Closes: https://lore.kernel.org/linux-ide/0bf3f2f0-0fc6-4ba5-a420-c0874ef82d64@heus…
Fixes: 28ab9769117c ("ata: libata-scsi: Honor the D_SENSE bit for CK_COND=1 and no error")
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
drivers/ata/libata-core.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c7752dc80028..590bebe1354d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5368,6 +5368,13 @@ void ata_dev_init(struct ata_device *dev)
*/
spin_lock_irqsave(ap->lock, flags);
dev->flags &= ~ATA_DFLAG_INIT_MASK;
+
+ /*
+ * A lot of user space programs incorrectly assume that the sense data
+ * is in descriptor format, without checking the RESPONSE CODE field of
+ * the returned sense data (to see which format the sense data is in).
+ */
+ dev->flags |= ATA_DFLAG_D_SENSE;
dev->horkage = 0;
spin_unlock_irqrestore(ap->lock, flags);
--
2.46.0
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 0ecc5be200c84e67114f3640064ba2bae3ba2f5a
Gitweb: https://git.kernel.org/tip/0ecc5be200c84e67114f3640064ba2bae3ba2f5a
Author: Yuntao Wang <yuntao.wang(a)linux.dev>
AuthorDate: Tue, 13 Aug 2024 09:48:27 +08:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Tue, 13 Aug 2024 15:15:19 +02:00
x86/apic: Make x2apic_disable() work correctly
x2apic_disable() clears x2apic_state and x2apic_mode unconditionally, even
when the state is X2APIC_ON_LOCKED, which prevents the kernel to disable
it thereby creating inconsistent state.
Due to the early state check for X2APIC_ON, the code path which warns about
a locked X2APIC cannot be reached.
Test for state < X2APIC_ON instead and move the clearing of the state and
mode variables to the place which actually disables X2APIC.
[ tglx: Massaged change log. Added Fixes tag. Moved clearing so it's at the
right place for back ports ]
Fixes: a57e456a7b28 ("x86/apic: Fix fallout from x2apic cleanup")
Signed-off-by: Yuntao Wang <yuntao.wang(a)linux.dev>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20240813014827.895381-1-yuntao.wang@linux.dev
---
arch/x86/kernel/apic/apic.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 66fd4b2..3736386 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1775,12 +1775,9 @@ static __init void apic_set_fixmap(bool read_apic);
static __init void x2apic_disable(void)
{
- u32 x2apic_id, state = x2apic_state;
+ u32 x2apic_id;
- x2apic_mode = 0;
- x2apic_state = X2APIC_DISABLED;
-
- if (state != X2APIC_ON)
+ if (x2apic_state < X2APIC_ON)
return;
x2apic_id = read_apic_id();
@@ -1793,6 +1790,10 @@ static __init void x2apic_disable(void)
}
__x2apic_disable();
+
+ x2apic_mode = 0;
+ x2apic_state = X2APIC_DISABLED;
+
/*
* Don't reread the APIC ID as it was already done from
* check_x2apic() and the APIC driver still is a x2APIC variant,
DW_HDMA_V0_LIE and DW_HDMA_V0_RIE are initialized as BIT(3) and BIT(4)
respectively in dw_hdma_control enum. But as per HDMA register these
bits are corresponds to LWIE and RWIE bit i.e local watermark interrupt
enable and remote watermarek interrupt enable. In linked list mode LWIE
and RWIE bits only enable the local and remote watermark interrupt.
Since the watermark interrupts are not used but enabled, this leads to
spurious interrupts getting generated. So remove the code that enables
them to avoid generating spurious watermark interrupts.
And also rename DW_HDMA_V0_LIE to DW_HDMA_V0_LWIE and DW_HDMA_V0_RIE to
DW_HDMA_V0_RWIE as there is no LIE and RIE bits in HDMA and those bits
are corresponds to LWIE and RWIE bits.
Fixes: e74c39573d35 ("dmaengine: dw-edma: Add support for native HDMA")
cc: stable(a)vger.kernel.org
Signed-off-by: Mrinmay Sarkar <quic_msarkar(a)quicinc.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
Reviewed-by: Serge Semin <fancer.lancer(a)gmail.com>
---
drivers/dma/dw-edma/dw-hdma-v0-core.c | 17 +++--------------
1 file changed, 3 insertions(+), 14 deletions(-)
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c
index a0aabdd..c4c73c4 100644
--- a/drivers/dma/dw-edma/dw-hdma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c
@@ -17,8 +17,8 @@ enum dw_hdma_control {
DW_HDMA_V0_CB = BIT(0),
DW_HDMA_V0_TCB = BIT(1),
DW_HDMA_V0_LLP = BIT(2),
- DW_HDMA_V0_LIE = BIT(3),
- DW_HDMA_V0_RIE = BIT(4),
+ DW_HDMA_V0_LWIE = BIT(3),
+ DW_HDMA_V0_RWIE = BIT(4),
DW_HDMA_V0_CCS = BIT(8),
DW_HDMA_V0_LLE = BIT(9),
};
@@ -195,25 +195,14 @@ static void dw_hdma_v0_write_ll_link(struct dw_edma_chunk *chunk,
static void dw_hdma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
{
struct dw_edma_burst *child;
- struct dw_edma_chan *chan = chunk->chan;
u32 control = 0, i = 0;
- int j;
if (chunk->cb)
control = DW_HDMA_V0_CB;
- j = chunk->bursts_alloc;
- list_for_each_entry(child, &chunk->burst->list, list) {
- j--;
- if (!j) {
- control |= DW_HDMA_V0_LIE;
- if (!(chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL))
- control |= DW_HDMA_V0_RIE;
- }
-
+ list_for_each_entry(child, &chunk->burst->list, list)
dw_hdma_v0_write_ll_data(chunk, i++, control, child->sz,
child->sar, child->dar);
- }
control = DW_HDMA_V0_LLP | DW_HDMA_V0_TCB;
if (!chunk->cb)
--
2.7.4
From: Mahesh Salgaonkar <mahesh(a)linux.ibm.com>
[ Upstream commit 0db880fc865ffb522141ced4bfa66c12ab1fbb70 ]
nmi_enter()/nmi_exit() touches per cpu variables which can lead to kernel
crash when invoked during real mode interrupt handling (e.g. early HMI/MCE
interrupt handler) if percpu allocation comes from vmalloc area.
Early HMI/MCE handlers are called through DEFINE_INTERRUPT_HANDLER_NMI()
wrapper which invokes nmi_enter/nmi_exit calls. We don't see any issue when
percpu allocation is from the embedded first chunk. However with
CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there are chances where percpu
allocation can come from the vmalloc area.
With kernel command line "percpu_alloc=page" we can force percpu allocation
to come from vmalloc area and can see kernel crash in machine_check_early:
[ 1.215714] NIP [c000000000e49eb4] rcu_nmi_enter+0x24/0x110
[ 1.215717] LR [c0000000000461a0] machine_check_early+0xf0/0x2c0
[ 1.215719] --- interrupt: 200
[ 1.215720] [c000000fffd73180] [0000000000000000] 0x0 (unreliable)
[ 1.215722] [c000000fffd731b0] [0000000000000000] 0x0
[ 1.215724] [c000000fffd73210] [c000000000008364] machine_check_early_common+0x134/0x1f8
Fix this by avoiding use of nmi_enter()/nmi_exit() in real mode if percpu
first chunk is not embedded.
CVE-2024-42126
Cc: stable(a)vger.kernel.org#5.10.x
Cc: gregkh(a)linuxfoundation.org
Reviewed-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Tested-by: Shirisha Ganta <shirisha(a)linux.ibm.com>
Signed-off-by: Mahesh Salgaonkar <mahesh(a)linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://msgid.link/20240410043006.81577-1-mahesh@linux.ibm.com
[ Conflicts in arch/powerpc/include/asm/interrupt.h
because machine_check_early() and machine_check_exception()
has been refactored. ]
Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
---
v2:
- Also fix for CONFIG_PPC_BOOK3S_64 not enabled.
- Add Upstream.
- Cc stable(a)vger.kernel.org.
---
arch/powerpc/include/asm/percpu.h | 10 ++++++++++
arch/powerpc/kernel/mce.c | 14 +++++++++++---
arch/powerpc/kernel/setup_64.c | 2 ++
arch/powerpc/kernel/traps.c | 8 +++++++-
4 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h
index 8e5b7d0b851c..634970ce13c6 100644
--- a/arch/powerpc/include/asm/percpu.h
+++ b/arch/powerpc/include/asm/percpu.h
@@ -15,6 +15,16 @@
#endif /* CONFIG_SMP */
#endif /* __powerpc64__ */
+#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) && defined(CONFIG_SMP)
+#include <linux/jump_label.h>
+DECLARE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged);
+
+#define percpu_first_chunk_is_paged \
+ (static_key_enabled(&__percpu_first_chunk_is_paged.key))
+#else
+#define percpu_first_chunk_is_paged false
+#endif /* CONFIG_PPC64 && CONFIG_SMP */
+
#include <asm-generic/percpu.h>
#include <asm/paca.h>
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 63702c0badb9..259343040e1b 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -594,8 +594,15 @@ long notrace machine_check_early(struct pt_regs *regs)
u8 ftrace_enabled = this_cpu_get_ftrace_enabled();
this_cpu_set_ftrace_enabled(0);
- /* Do not use nmi_enter/exit for pseries hpte guest */
- if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
+ /*
+ * Do not use nmi_enter/exit for pseries hpte guest
+ *
+ * Likewise, do not use it in real mode if percpu first chunk is not
+ * embedded. With CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there
+ * are chances where percpu allocation can come from vmalloc area.
+ */
+ if ((radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) &&
+ !percpu_first_chunk_is_paged)
nmi_enter();
hv_nmi_check_nonrecoverable(regs);
@@ -606,7 +613,8 @@ long notrace machine_check_early(struct pt_regs *regs)
if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
- if (radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR))
+ if ((radix_enabled() || !firmware_has_feature(FW_FEATURE_LPAR)) &&
+ !percpu_first_chunk_is_paged)
nmi_exit();
this_cpu_set_ftrace_enabled(ftrace_enabled);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 3f8426bccd16..899d87de0165 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -824,6 +824,7 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
+DEFINE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged);
static void __init pcpu_populate_pte(unsigned long addr)
{
@@ -903,6 +904,7 @@ void __init setup_per_cpu_areas(void)
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
+ static_key_enable(&__percpu_first_chunk_is_paged.key);
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) {
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index b0e87dce2b9a..b4d108bef814 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -835,8 +835,14 @@ void machine_check_exception(struct pt_regs *regs)
* This is silly. The BOOK3S_64 should just call a different function
* rather than expecting semantics to magically change. Something
* like 'non_nmi_machine_check_exception()', perhaps?
+ *
+ * Do not use nmi_enter/exit in real mode if percpu first chunk is
+ * not embedded. With CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled
+ * there are chances where percpu allocation can come from
+ * vmalloc area.
*/
- const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64);
+ const bool nmi = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+ !percpu_first_chunk_is_paged;
if (nmi) nmi_enter();
--
2.34.1