From: Zijun Hu <quic_zijuhu(a)quicinc.com>
drivers_probe_store() regards bus_rescan_devices_helper()'s returned
value 0 as success when find driver for a single device user specify
that is wrong since the following 3 failed cases also return 0:
(1) the device is dead
(2) bus fails to match() the device with any its driver
(3) bus fails to probe() the device with any its driver
Fixed by only regarding successfully attaching the device to a driver
as success.
Fixes: b8c5cec23d5c ("Driver core: udev triggered device-<>driver binding")
Cc: stable(a)vger.kernel.org
Signed-off-by: Zijun Hu <quic_zijuhu(a)quicinc.com>
---
drivers/base/bus.c | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index abf090ace833..0a994e63785c 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -40,6 +40,20 @@ static struct kset *bus_kset;
struct driver_attribute driver_attr_##_name = \
__ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
+/* Bus rescans drivers for a single device */
+static int __must_check bus_rescan_single_device(struct device *dev)
+{
+ int ret;
+
+ if (dev->parent && dev->bus->need_parent_lock)
+ device_lock(dev->parent);
+ ret = device_attach(dev);
+ if (dev->parent && dev->bus->need_parent_lock)
+ device_unlock(dev->parent);
+
+ return ret;
+}
+
static int __must_check bus_rescan_devices_helper(struct device *dev,
void *data);
@@ -311,12 +325,19 @@ static ssize_t drivers_probe_store(const struct bus_type *bus,
{
struct device *dev;
int err = -EINVAL;
+ int res;
dev = bus_find_device_by_name(bus, NULL, buf);
if (!dev)
return -ENODEV;
- if (bus_rescan_devices_helper(dev, NULL) == 0)
+
+ res = bus_rescan_single_device(dev);
+ /* Propagate error code upwards as far as possible */
+ if (res < 0)
+ err = res;
+ else if (res == 1)
err = count;
+
put_device(dev);
return err;
}
---
base-commit: 888f67e621dda5c2804a696524e28d0ca4cf0a80
change-id: 20240826-fix_drivers_probe-88c6a2cc1899
Best regards,
--
Zijun Hu <quic_zijuhu(a)quicinc.com>
This changes the judgement of if needing to round up the width or not,
from using the `dp_flow` to the plane's type.
The `dp_flow` can be -22(-EINVAL) even the plane is a PRIMARY one.
See `client_reg[]` in `ipu-common.c`.
[ 0.605141] [drm:ipu_plane_init] channel 28, dp flow -22, possible_crtcs=0x0
Per the commit message in commit: 71f9fd5bcf09, using the plane type for
judging if rounding up is needed is correct.
Fixes: 71f9fd5bcf09 ("drm/imx: ipuv3-plane: Fix overlay plane width")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paul Pu <hui.pu(a)gehealthcare.com>
---
drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c
index 704c549750f9..cee83ac70ada 100644
--- a/drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c
@@ -614,7 +614,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
break;
}
- if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG)
+ if (ipu_plane->base.type == DRM_PLANE_TYPE_PRIMARY)
width = ipu_src_rect_width(new_state);
else
width = drm_rect_width(&new_state->src) >> 16;
base-commit: 431c1646e1f86b949fa3685efc50b660a364c2b6
--
2.39.2
From: Breno Leitao <leitao(a)debian.org>
[ Upstream commit f8321fa75102246d7415a6af441872f6637c93ab ]
After the commit bdacf3e34945 ("net: Use nested-BH locking for
napi_alloc_cache.") was merged, the following warning began to appear:
WARNING: CPU: 5 PID: 1 at net/core/skbuff.c:1451 napi_skb_cache_put+0x82/0x4b0
__warn+0x12f/0x340
napi_skb_cache_put+0x82/0x4b0
napi_skb_cache_put+0x82/0x4b0
report_bug+0x165/0x370
handle_bug+0x3d/0x80
exc_invalid_op+0x1a/0x50
asm_exc_invalid_op+0x1a/0x20
__free_old_xmit+0x1c8/0x510
napi_skb_cache_put+0x82/0x4b0
__free_old_xmit+0x1c8/0x510
__free_old_xmit+0x1c8/0x510
__pfx___free_old_xmit+0x10/0x10
The issue arises because virtio is assuming it's running in NAPI context
even when it's not, such as in the netpoll case.
To resolve this, modify virtnet_poll_tx() to only set NAPI when budget
is available. Same for virtnet_poll_cleantx(), which always assumed that
it was in a NAPI context.
Fixes: df133f3f9625 ("virtio_net: bulk free tx skbs")
Suggested-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Reviewed-by: Jakub Kicinski <kuba(a)kernel.org>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Jason Wang <jasowang(a)redhat.com>
Reviewed-by: Heng Qi <hengqi(a)linux.alibaba.com>
Link: https://patch.msgid.link/20240712115325.54175-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
[Shivani: Modified to apply on v5.15.y]
Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
---
drivers/net/virtio_net.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index bd0cb3a03b7b..d8138ad4f865 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1548,7 +1548,7 @@ static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
return false;
}
-static void virtnet_poll_cleantx(struct receive_queue *rq)
+static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int index = vq2rxq(rq->vq);
@@ -1561,7 +1561,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
if (__netif_tx_trylock(txq)) {
do {
virtqueue_disable_cb(sq->vq);
- free_old_xmit_skbs(sq, true);
+ free_old_xmit_skbs(sq, !!budget);
} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
@@ -1580,7 +1580,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
unsigned int received;
unsigned int xdp_xmit = 0;
- virtnet_poll_cleantx(rq);
+ virtnet_poll_cleantx(rq, budget);
received = virtnet_receive(rq, budget, &xdp_xmit);
@@ -1683,7 +1683,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
- free_old_xmit_skbs(sq, true);
+ free_old_xmit_skbs(sq, !!budget);
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq);
--
2.39.4
From: Breno Leitao <leitao(a)debian.org>
[ Upstream commit f8321fa75102246d7415a6af441872f6637c93ab ]
After the commit bdacf3e34945 ("net: Use nested-BH locking for
napi_alloc_cache.") was merged, the following warning began to appear:
WARNING: CPU: 5 PID: 1 at net/core/skbuff.c:1451 napi_skb_cache_put+0x82/0x4b0
__warn+0x12f/0x340
napi_skb_cache_put+0x82/0x4b0
napi_skb_cache_put+0x82/0x4b0
report_bug+0x165/0x370
handle_bug+0x3d/0x80
exc_invalid_op+0x1a/0x50
asm_exc_invalid_op+0x1a/0x20
__free_old_xmit+0x1c8/0x510
napi_skb_cache_put+0x82/0x4b0
__free_old_xmit+0x1c8/0x510
__free_old_xmit+0x1c8/0x510
__pfx___free_old_xmit+0x10/0x10
The issue arises because virtio is assuming it's running in NAPI context
even when it's not, such as in the netpoll case.
To resolve this, modify virtnet_poll_tx() to only set NAPI when budget
is available. Same for virtnet_poll_cleantx(), which always assumed that
it was in a NAPI context.
Fixes: df133f3f9625 ("virtio_net: bulk free tx skbs")
Suggested-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Reviewed-by: Jakub Kicinski <kuba(a)kernel.org>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Jason Wang <jasowang(a)redhat.com>
Reviewed-by: Heng Qi <hengqi(a)linux.alibaba.com>
Link: https://patch.msgid.link/20240712115325.54175-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
[Shivani: Modified to apply on v6.1.y]
Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
---
drivers/net/virtio_net.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 61cc0ed1ddc1..e3e5107adaca 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1638,7 +1638,7 @@ static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
return false;
}
-static void virtnet_poll_cleantx(struct receive_queue *rq)
+static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int index = vq2rxq(rq->vq);
@@ -1656,7 +1656,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
do {
virtqueue_disable_cb(sq->vq);
- free_old_xmit_skbs(sq, true);
+ free_old_xmit_skbs(sq, !!budget);
} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
@@ -1675,7 +1675,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
unsigned int received;
unsigned int xdp_xmit = 0;
- virtnet_poll_cleantx(rq);
+ virtnet_poll_cleantx(rq, budget);
received = virtnet_receive(rq, budget, &xdp_xmit);
@@ -1778,7 +1778,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
- free_old_xmit_skbs(sq, true);
+ free_old_xmit_skbs(sq, !!budget);
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq);
--
2.39.4
devm_kasprintf() can return a NULL pointer on failure but this returned
value is not checked. Fix this lack and check the returned value.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 6ae39b7c7ed4 ("wifi: mt76: mt7921: Support temp sensor")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/net/wireless/mediatek/mt76/mt7921/init.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index ef0c721d26e3..5ab395d9d93e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -52,6 +52,8 @@ static int mt7921_thermal_init(struct mt792x_phy *phy)
name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7921_%s",
wiphy_name(wiphy));
+ if (!name)
+ return -ENOMEM;
hwmon = devm_hwmon_device_register_with_groups(&wiphy->dev, name, phy,
mt7921_hwmon_groups);
--
2.25.1
devm_kasprintf() can return a NULL pointer on failure but this returned
value is not checked. Fix this lack and check the returned value.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 6ae39b7c7ed4 ("wifi: mt76: mt7921: Support temp sensor")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/net/wireless/mediatek/mt76/mt7915/init.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index a978f434dc5e..7bc3b4cd3592 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -194,6 +194,8 @@ static int mt7915_thermal_init(struct mt7915_phy *phy)
name = devm_kasprintf(&wiphy->dev, GFP_KERNEL, "mt7915_%s",
wiphy_name(wiphy));
+ if (!name)
+ return -ENOMEM;
cdev = thermal_cooling_device_register(name, phy, &mt7915_thermal_ops);
if (!IS_ERR(cdev)) {
--
2.25.1
From: Breno Leitao <leitao(a)debian.org>
[ Upstream commit f8321fa75102246d7415a6af441872f6637c93ab ]
After the commit bdacf3e34945 ("net: Use nested-BH locking for
napi_alloc_cache.") was merged, the following warning began to appear:
WARNING: CPU: 5 PID: 1 at net/core/skbuff.c:1451 napi_skb_cache_put+0x82/0x4b0
__warn+0x12f/0x340
napi_skb_cache_put+0x82/0x4b0
napi_skb_cache_put+0x82/0x4b0
report_bug+0x165/0x370
handle_bug+0x3d/0x80
exc_invalid_op+0x1a/0x50
asm_exc_invalid_op+0x1a/0x20
__free_old_xmit+0x1c8/0x510
napi_skb_cache_put+0x82/0x4b0
__free_old_xmit+0x1c8/0x510
__free_old_xmit+0x1c8/0x510
__pfx___free_old_xmit+0x10/0x10
The issue arises because virtio is assuming it's running in NAPI context
even when it's not, such as in the netpoll case.
To resolve this, modify virtnet_poll_tx() to only set NAPI when budget
is available. Same for virtnet_poll_cleantx(), which always assumed that
it was in a NAPI context.
Fixes: df133f3f9625 ("virtio_net: bulk free tx skbs")
Suggested-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Breno Leitao <leitao(a)debian.org>
Reviewed-by: Jakub Kicinski <kuba(a)kernel.org>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Jason Wang <jasowang(a)redhat.com>
Reviewed-by: Heng Qi <hengqi(a)linux.alibaba.com>
Link: https://patch.msgid.link/20240712115325.54175-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
[Shivani: Modified to apply on v6.6.y]
Signed-off-by: Shivani Agarwal <shivani.agarwal(a)broadcom.com>
---
drivers/net/virtio_net.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 51ade909c84f..bc01f2dafa94 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2140,7 +2140,7 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
return packets;
}
-static void virtnet_poll_cleantx(struct receive_queue *rq)
+static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int index = vq2rxq(rq->vq);
@@ -2158,7 +2158,7 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
do {
virtqueue_disable_cb(sq->vq);
- free_old_xmit_skbs(sq, true);
+ free_old_xmit_skbs(sq, !!budget);
} while (unlikely(!virtqueue_enable_cb_delayed(sq->vq)));
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
@@ -2177,7 +2177,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
unsigned int received;
unsigned int xdp_xmit = 0;
- virtnet_poll_cleantx(rq);
+ virtnet_poll_cleantx(rq, budget);
received = virtnet_receive(rq, budget, &xdp_xmit);
@@ -2280,7 +2280,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
txq = netdev_get_tx_queue(vi->dev, index);
__netif_tx_lock(txq, raw_smp_processor_id());
virtqueue_disable_cb(sq->vq);
- free_old_xmit_skbs(sq, true);
+ free_old_xmit_skbs(sq, !!budget);
if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
netif_tx_wake_queue(txq);
--
2.39.4
From: Willem de Bruijn <willemb(a)google.com>
Tighten csum_start and csum_offset checks in virtio_net_hdr_to_skb
for GSO packets.
The function already checks that a checksum requested with
VIRTIO_NET_HDR_F_NEEDS_CSUM is in skb linear. But for GSO packets
this might not hold for segs after segmentation.
Syzkaller demonstrated to reach this warning in skb_checksum_help
offset = skb_checksum_start_offset(skb);
ret = -EINVAL;
if (WARN_ON_ONCE(offset >= skb_headlen(skb)))
By injecting a TSO packet:
WARNING: CPU: 1 PID: 3539 at net/core/dev.c:3284 skb_checksum_help+0x3d0/0x5b0
ip_do_fragment+0x209/0x1b20 net/ipv4/ip_output.c:774
ip_finish_output_gso net/ipv4/ip_output.c:279 [inline]
__ip_finish_output+0x2bd/0x4b0 net/ipv4/ip_output.c:301
iptunnel_xmit+0x50c/0x930 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x2296/0x2c70 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x759/0xa60 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4850 [inline]
netdev_start_xmit include/linux/netdevice.h:4864 [inline]
xmit_one net/core/dev.c:3595 [inline]
dev_hard_start_xmit+0x261/0x8c0 net/core/dev.c:3611
__dev_queue_xmit+0x1b97/0x3c90 net/core/dev.c:4261
packet_snd net/packet/af_packet.c:3073 [inline]
The geometry of the bad input packet at tcp_gso_segment:
[ 52.003050][ T8403] skb len=12202 headroom=244 headlen=12093 tailroom=0
[ 52.003050][ T8403] mac=(168,24) mac_len=24 net=(192,52) trans=244
[ 52.003050][ T8403] shinfo(txflags=0 nr_frags=1 gso(size=1552 type=3 segs=0))
[ 52.003050][ T8403] csum(0x60000c7 start=199 offset=1536
ip_summed=3 complete_sw=0 valid=0 level=0)
Mitigate with stricter input validation.
csum_offset: for GSO packets, deduce the correct value from gso_type.
This is already done for USO. Extend it to TSO. Let UFO be:
udp[46]_ufo_fragment ignores these fields and always computes the
checksum in software.
csum_start: finding the real offset requires parsing to the transport
header. Do not add a parser, use existing segmentation parsing. Thanks
to SKB_GSO_DODGY, that also catches bad packets that are hw offloaded.
Again test both TSO and USO. Do not test UFO for the above reason, and
do not test UDP tunnel offload.
GSO packet are almost always CHECKSUM_PARTIAL. USO packets may be
CHECKSUM_NONE since commit 10154dbded6d6 ("udp: Allow GSO transmit
from devices with no checksum offload"), but then still these fields
are initialized correctly in udp4_hwcsum/udp6_hwcsum_outgoing. So no
need to test for ip_summed == CHECKSUM_PARTIAL first.
This revises an existing fix mentioned in the Fixes tag, which broke
small packets with GSO offload, as detected by kselftests.
Link: https://syzkaller.appspot.com/bug?extid=e1db31216c789f552871
Link: https://lore.kernel.org/netdev/20240723223109.2196886-1-kuba@kernel.org
Fixes: e269d79c7d35 ("net: missing check virtio")
Cc: stable(a)vger.kernel.org
Signed-off-by: Willem de Bruijn <willemb(a)google.com>
Link: https://patch.msgid.link/20240729201108.1615114-1-willemdebruijn.kernel@gma…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
(cherry picked from commit 89add40066f9ed9abe5f7f886fe5789ff7e0c50e)
---
include/linux/virtio_net.h | 22 ++++++++++++++++++++--
net/ipv4/tcp_offload.c | 3 +++
net/ipv4/udp_offload.c | 18 +++++++++++++++---
3 files changed, 38 insertions(+), 5 deletions(-)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 6047058d6703..6fe99dd3ca39 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -144,9 +144,27 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
unsigned int nh_off = p_off;
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* UFO may not include transport header in gso_size. */
- if (gso_type & SKB_GSO_UDP)
+ switch (gso_type & ~SKB_GSO_TCP_ECN) {
+ case SKB_GSO_UDP:
+ /* UFO may not include transport header in gso_size. */
nh_off -= thlen;
+ break;
+ case SKB_GSO_UDP_L4:
+ if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM))
+ return -EINVAL;
+ if (skb->csum_offset != offsetof(struct udphdr, check))
+ return -EINVAL;
+ if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS)
+ return -EINVAL;
+ if (gso_type != SKB_GSO_UDP_L4)
+ return -EINVAL;
+ break;
+ case SKB_GSO_TCPV4:
+ case SKB_GSO_TCPV6:
+ if (skb->csum_offset != offsetof(struct tcphdr, check))
+ return -EINVAL;
+ break;
+ }
/* Kernel has a special handling for GSO_BY_FRAGS. */
if (gso_size == GSO_BY_FRAGS)
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index fc61cd3fea65..357d3be04f84 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -71,6 +71,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
if (thlen < sizeof(*th))
goto out;
+ if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb)))
+ goto out;
+
if (!pskb_may_pull(skb, thlen))
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a0b569d0085b..0407b829e481 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -269,13 +269,25 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
__sum16 check;
__be16 newlen;
- if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
- return __udp_gso_segment_list(gso_skb, features, is_ipv6);
-
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (unlikely(skb_checksum_start(gso_skb) !=
+ skb_transport_header(gso_skb) &&
+ !(skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)))
+ return ERR_PTR(-EINVAL);
+
+ if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
+ mss);
+ return NULL;
+ }
+
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+
skb_pull(gso_skb, sizeof(*uh));
/* clear destructor to avoid skb_segment assigning it to tail */
--
2.20.1
The quilt patch titled
Subject: mm: krealloc: consider spare memory for __GFP_ZERO
has been removed from the -mm tree. Its filename was
mm-krealloc-consider-spare-memory-for-__gfp_zero.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Danilo Krummrich <dakr(a)kernel.org>
Subject: mm: krealloc: consider spare memory for __GFP_ZERO
Date: Tue, 13 Aug 2024 00:34:34 +0200
As long as krealloc() is called with __GFP_ZERO consistently, starting
with the initial memory allocation, __GFP_ZERO should be fully honored.
However, if for an existing allocation krealloc() is called with a
decreased size, it is not ensured that the spare portion the allocation is
zeroed. Thus, if krealloc() is subsequently called with a larger size
again, __GFP_ZERO can't be fully honored, since we don't know the previous
size, but only the bucket size.
Example:
buf = kzalloc(64, GFP_KERNEL);
memset(buf, 0xff, 64);
buf = krealloc(buf, 48, GFP_KERNEL | __GFP_ZERO);
/* After this call the last 16 bytes are still 0xff. */
buf = krealloc(buf, 64, GFP_KERNEL | __GFP_ZERO);
Fix this, by explicitly setting spare memory to zero, when shrinking an
allocation with __GFP_ZERO flag set or init_on_alloc enabled.
Link: https://lkml.kernel.org/r/20240812223707.32049-1-dakr@kernel.org
Signed-off-by: Danilo Krummrich <dakr(a)kernel.org>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Acked-by: David Rientjes <rientjes(a)google.com>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Hyeonggon Yoo <42.hyeyoo(a)gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: Pekka Enberg <penberg(a)kernel.org>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slab_common.c | 7 +++++++
1 file changed, 7 insertions(+)
--- a/mm/slab_common.c~mm-krealloc-consider-spare-memory-for-__gfp_zero
+++ a/mm/slab_common.c
@@ -1273,6 +1273,13 @@ __do_krealloc(const void *p, size_t new_
/* If the object still fits, repoison it precisely. */
if (ks >= new_size) {
+ /* Zero out spare memory. */
+ if (want_init_on_alloc(flags)) {
+ kasan_disable_current();
+ memset((void *)p + new_size, 0, ks - new_size);
+ kasan_enable_current();
+ }
+
p = kasan_krealloc((void *)p, new_size, flags);
return (void *)p;
}
_
Patches currently in -mm which might be from dakr(a)kernel.org are
This changes the judgement of if needing to round up the width or not,
from using the `dp_flow` to the plane's type.
The `dp_flow` can be -22(-EINVAL) even the plane is a PRIMARY one.
See `client_reg[]` in `ipu-common.c`.
[ 0.605141] [drm:ipu_plane_init] channel 28, dp flow -22, possible_crtcs=0x0
Per the commit message in commit: 71f9fd5bcf09, using the plane type for
judging if rounding up is needed is correct.
Fixes: 71f9fd5bcf09 ("drm/imx: ipuv3-plane: Fix overlay plane width")
Cc: stable(a)vger.kernel.org
Signed-off-by: Paul Pu <hui.pu(a)gehealthcare.com>
---
drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c
index 704c549750f9..cee83ac70ada 100644
--- a/drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3/ipuv3-plane.c
@@ -614,7 +614,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
break;
}
- if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG)
+ if (ipu_plane->base.type == DRM_PLANE_TYPE_PRIMARY)
width = ipu_src_rect_width(new_state);
else
width = drm_rect_width(&new_state->src) >> 16;
base-commit: 431c1646e1f86b949fa3685efc50b660a364c2b6
--
2.39.2
From: Andrey Konovalov <andreyknvl(a)gmail.com>
Commit a7f3813e589f ("usb: gadget: dummy_hcd: Switch to hrtimer transfer
scheduler") switched dummy_hcd to use hrtimer and made the timer's
callback be executed in the hardirq context.
With that change, __usb_hcd_giveback_urb now gets executed in the hardirq
context, which causes problems for KCOV and KMSAN.
One problem is that KCOV now is unable to collect coverage from
the USB code that gets executed from the dummy_hcd's timer callback,
as KCOV cannot collect coverage in the hardirq context.
Another problem is that the dummy_hcd hrtimer might get triggered in the
middle of a softirq with KCOV remote coverage collection enabled, and that
causes a WARNING in KCOV, as reported by syzbot. (I sent a separate patch
to shut down this WARNING, but that doesn't fix the other two issues.)
Finally, KMSAN appears to ignore tracking memory copying operations
that happen in the hardirq context, which causes false positive
kernel-infoleaks, as reported by syzbot.
Change the hrtimer in dummy_hcd to execute the callback in the softirq
context.
Reported-by: syzbot+2388cdaeb6b10f0c13ac(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=2388cdaeb6b10f0c13ac
Reported-by: syzbot+17ca2339e34a1d863aad(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=17ca2339e34a1d863aad
Fixes: a7f3813e589f ("usb: gadget: dummy_hcd: Switch to hrtimer transfer scheduler")
Cc: stable(a)vger.kernel.org
Signed-off-by: Andrey Konovalov <andreyknvl(a)gmail.com>
---
Marcello, would this change be acceptable for your use case?
If we wanted to keep the hardirq hrtimer, we would need teach KCOV to
collect coverage in the hardirq context (or disable it, which would be
unfortunate) and also fix whatever is wrong with KMSAN, but all that
requires some work.
---
drivers/usb/gadget/udc/dummy_hcd.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index f37b0d8386c1a..ff7bee78bcc49 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -1304,7 +1304,8 @@ static int dummy_urb_enqueue(
/* kick the scheduler, it'll do the rest */
if (!hrtimer_active(&dum_hcd->timer))
- hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS), HRTIMER_MODE_REL);
+ hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS),
+ HRTIMER_MODE_REL_SOFT);
done:
spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);
@@ -1325,7 +1326,7 @@ static int dummy_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
rc = usb_hcd_check_unlink_urb(hcd, urb, status);
if (!rc && dum_hcd->rh_state != DUMMY_RH_RUNNING &&
!list_empty(&dum_hcd->urbp_list))
- hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL);
+ hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL_SOFT);
spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);
return rc;
@@ -1995,7 +1996,8 @@ static enum hrtimer_restart dummy_timer(struct hrtimer *t)
dum_hcd->udev = NULL;
} else if (dum_hcd->rh_state == DUMMY_RH_RUNNING) {
/* want a 1 msec delay here */
- hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS), HRTIMER_MODE_REL);
+ hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS),
+ HRTIMER_MODE_REL_SOFT);
}
spin_unlock_irqrestore(&dum->lock, flags);
@@ -2389,7 +2391,7 @@ static int dummy_bus_resume(struct usb_hcd *hcd)
dum_hcd->rh_state = DUMMY_RH_RUNNING;
set_link_state(dum_hcd);
if (!list_empty(&dum_hcd->urbp_list))
- hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL);
+ hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL_SOFT);
hcd->state = HC_STATE_RUNNING;
}
spin_unlock_irq(&dum_hcd->dum->lock);
@@ -2467,7 +2469,7 @@ static DEVICE_ATTR_RO(urbs);
static int dummy_start_ss(struct dummy_hcd *dum_hcd)
{
- hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
dum_hcd->timer.function = dummy_timer;
dum_hcd->rh_state = DUMMY_RH_RUNNING;
dum_hcd->stream_en_ep = 0;
@@ -2497,7 +2499,7 @@ static int dummy_start(struct usb_hcd *hcd)
return dummy_start_ss(dum_hcd);
spin_lock_init(&dum_hcd->dum->lock);
- hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
dum_hcd->timer.function = dummy_timer;
dum_hcd->rh_state = DUMMY_RH_RUNNING;
--
2.25.1
From: Andrey Konovalov <andreyknvl(a)gmail.com>
Commit a7f3813e589f ("usb: gadget: dummy_hcd: Switch to hrtimer transfer
scheduler") switched dummy_hcd to use hrtimer and made the timer's
callback be executed in the hardirq context.
With that change, __usb_hcd_giveback_urb now gets executed in the hardirq
context, which causes problems for KCOV and KMSAN.
One problem is that KCOV now is unable to collect coverage from
the USB code that gets executed from the dummy_hcd's timer callback,
as KCOV cannot collect coverage in the hardirq context.
Another problem is that the dummy_hcd hrtimer might get triggered in the
middle of a softirq with KCOV remote coverage collection enabled, and that
causes a WARNING in KCOV, as reported by syzbot. (I sent a separate patch
to shut down this WARNING, but that doesn't fix the other two issues.)
Finally, KMSAN appears to ignore tracking memory copying operations
that happen in the hardirq context, which causes false positive
kernel-infoleaks, as reported by syzbot.
Change the hrtimer in dummy_hcd to execute the callback in the softirq
context.
Reported-by: syzbot+2388cdaeb6b10f0c13ac(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=2388cdaeb6b10f0c13ac
Reported-by: syzbot+17ca2339e34a1d863aad(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=17ca2339e34a1d863aad
Reported-by: syzbot+c793a7eca38803212c61(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c793a7eca38803212c61
Reported-by: syzbot+1e6e0b916b211bee1bd6(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=1e6e0b916b211bee1bd6
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202406141323.413a90d2-lkp@intel.com
Fixes: a7f3813e589f ("usb: gadget: dummy_hcd: Switch to hrtimer transfer scheduler")
Cc: stable(a)vger.kernel.org
Acked-by: Marcello Sylvester Bauer <sylv(a)sylv.io>
Signed-off-by: Andrey Konovalov <andreyknvl(a)gmail.com>
---
No difference to v1 except a few more tags.
---
drivers/usb/gadget/udc/dummy_hcd.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index f37b0d8386c1a..ff7bee78bcc49 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -1304,7 +1304,8 @@ static int dummy_urb_enqueue(
/* kick the scheduler, it'll do the rest */
if (!hrtimer_active(&dum_hcd->timer))
- hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS), HRTIMER_MODE_REL);
+ hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS),
+ HRTIMER_MODE_REL_SOFT);
done:
spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);
@@ -1325,7 +1326,7 @@ static int dummy_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
rc = usb_hcd_check_unlink_urb(hcd, urb, status);
if (!rc && dum_hcd->rh_state != DUMMY_RH_RUNNING &&
!list_empty(&dum_hcd->urbp_list))
- hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL);
+ hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL_SOFT);
spin_unlock_irqrestore(&dum_hcd->dum->lock, flags);
return rc;
@@ -1995,7 +1996,8 @@ static enum hrtimer_restart dummy_timer(struct hrtimer *t)
dum_hcd->udev = NULL;
} else if (dum_hcd->rh_state == DUMMY_RH_RUNNING) {
/* want a 1 msec delay here */
- hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS), HRTIMER_MODE_REL);
+ hrtimer_start(&dum_hcd->timer, ns_to_ktime(DUMMY_TIMER_INT_NSECS),
+ HRTIMER_MODE_REL_SOFT);
}
spin_unlock_irqrestore(&dum->lock, flags);
@@ -2389,7 +2391,7 @@ static int dummy_bus_resume(struct usb_hcd *hcd)
dum_hcd->rh_state = DUMMY_RH_RUNNING;
set_link_state(dum_hcd);
if (!list_empty(&dum_hcd->urbp_list))
- hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL);
+ hrtimer_start(&dum_hcd->timer, ns_to_ktime(0), HRTIMER_MODE_REL_SOFT);
hcd->state = HC_STATE_RUNNING;
}
spin_unlock_irq(&dum_hcd->dum->lock);
@@ -2467,7 +2469,7 @@ static DEVICE_ATTR_RO(urbs);
static int dummy_start_ss(struct dummy_hcd *dum_hcd)
{
- hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
dum_hcd->timer.function = dummy_timer;
dum_hcd->rh_state = DUMMY_RH_RUNNING;
dum_hcd->stream_en_ep = 0;
@@ -2497,7 +2499,7 @@ static int dummy_start(struct usb_hcd *hcd)
return dummy_start_ss(dum_hcd);
spin_lock_init(&dum_hcd->dum->lock);
- hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&dum_hcd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
dum_hcd->timer.function = dummy_timer;
dum_hcd->rh_state = DUMMY_RH_RUNNING;
--
2.25.1
Return devm_of_clk_add_hw_provider() in order to transfer the error, if it
fails due to resource allocation failure or device tree clock provider
registration failure.
Cc: stable(a)vger.kernel.org
Fixes: ebbfabc16d23 ("ASoC: rt5682: Add CCF usage for providing I2S clks")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
sound/soc/codecs/rt5682.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index e3aca9c785a0..aa163ec40862 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -2903,8 +2903,10 @@ int rt5682_register_dai_clks(struct rt5682_priv *rt5682)
}
if (dev->of_node) {
- devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get,
+ ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get,
dai_clk_hw);
+ if (ret)
+ return ret;
} else {
ret = devm_clk_hw_register_clkdev(dev, dai_clk_hw,
init.name,
--
2.25.1
The quilt patch titled
Subject: mm/memcontrol: respect zswap.writeback setting from parent cg too
has been removed from the -mm tree. Its filename was
mm-memcontrol-respect-zswapwriteback-setting-from-parent-cg-too.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Mike Yuan <me(a)yhndnzj.com>
Subject: mm/memcontrol: respect zswap.writeback setting from parent cg too
Date: Fri, 23 Aug 2024 16:27:06 +0000
Currently, the behavior of zswap.writeback wrt. the cgroup hierarchy
seems a bit odd. Unlike zswap.max, it doesn't honor the value from parent
cgroups. This surfaced when people tried to globally disable zswap
writeback, i.e. reserve physical swap space only for hibernation [1] -
disabling zswap.writeback only for the root cgroup results in subcgroups
with zswap.writeback=1 still performing writeback.
The inconsistency became more noticeable after I introduced the
MemoryZSwapWriteback= systemd unit setting [2] for controlling the knob.
The patch assumed that the kernel would enforce the value of parent
cgroups. It could probably be workarounded from systemd's side, by going
up the slice unit tree and inheriting the value. Yet I think it's more
sensible to make it behave consistently with zswap.max and friends.
[1] https://wiki.archlinux.org/title/Power_management/Suspend_and_hibernate#Dis…
[2] https://github.com/systemd/systemd/pull/31734
Link: https://lkml.kernel.org/r/20240823162506.12117-1-me@yhndnzj.com
Fixes: 501a06fe8e4c ("zswap: memcontrol: implement zswap writeback disabling")
Signed-off-by: Mike Yuan <me(a)yhndnzj.com>
Reviewed-by: Nhat Pham <nphamcs(a)gmail.com>
Acked-by: Yosry Ahmed <yosryahmed(a)google.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Michal Koutn�� <mkoutny(a)suse.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: Shakeel Butt <shakeel.butt(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
Documentation/admin-guide/cgroup-v2.rst | 7 ++++---
mm/memcontrol.c | 12 +++++++++---
2 files changed, 13 insertions(+), 6 deletions(-)
--- a/Documentation/admin-guide/cgroup-v2.rst~mm-memcontrol-respect-zswapwriteback-setting-from-parent-cg-too
+++ a/Documentation/admin-guide/cgroup-v2.rst
@@ -1717,9 +1717,10 @@ The following nested keys are defined.
entries fault back in or are written out to disk.
memory.zswap.writeback
- A read-write single value file. The default value is "1". The
- initial value of the root cgroup is 1, and when a new cgroup is
- created, it inherits the current value of its parent.
+ A read-write single value file. The default value is "1".
+ Note that this setting is hierarchical, i.e. the writeback would be
+ implicitly disabled for child cgroups if the upper hierarchy
+ does so.
When this is set to 0, all swapping attempts to swapping devices
are disabled. This included both zswap writebacks, and swapping due
--- a/mm/memcontrol.c~mm-memcontrol-respect-zswapwriteback-setting-from-parent-cg-too
+++ a/mm/memcontrol.c
@@ -3613,8 +3613,7 @@ mem_cgroup_css_alloc(struct cgroup_subsy
memcg1_soft_limit_reset(memcg);
#ifdef CONFIG_ZSWAP
memcg->zswap_max = PAGE_COUNTER_MAX;
- WRITE_ONCE(memcg->zswap_writeback,
- !parent || READ_ONCE(parent->zswap_writeback));
+ WRITE_ONCE(memcg->zswap_writeback, true);
#endif
page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
if (parent) {
@@ -5320,7 +5319,14 @@ void obj_cgroup_uncharge_zswap(struct ob
bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
{
/* if zswap is disabled, do not block pages going to the swapping device */
- return !zswap_is_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback);
+ if (!zswap_is_enabled())
+ return true;
+
+ for (; memcg; memcg = parent_mem_cgroup(memcg))
+ if (!READ_ONCE(memcg->zswap_writeback))
+ return false;
+
+ return true;
}
static u64 zswap_current_read(struct cgroup_subsys_state *css,
_
Patches currently in -mm which might be from me(a)yhndnzj.com are
documentation-cgroup-v2-clarify-that-zswapwriteback-is-ignored-if-zswap-is-disabled.patch
selftests-test_zswap-add-test-for-hierarchical-zswapwriteback.patch
Xen domU instances do not boot on x86 with ACPI enabled, so the entire
ACPI subsystem is ultimately disabled. This causes acpi_mps_check()
to trigger a warning that the ACPI MPS table is not present, which then
disables APIC support on domU, breaking the CPU topology detection for
all vCPUs other than the boot vCPU.
Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management")
Signed-off-by: Ariadne Conill <ariadne(a)ariadne.space>
Cc: x86(a)kernel.org
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: linux-kernel(a)vger.kernel.org
Cc: stable(a)vger.kernel.org
---
arch/x86/kernel/cpu/topology.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 621a151ccf7d..38fa5ed816d6 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -429,7 +429,7 @@ void __init topology_apply_cmdline_limits_early(void)
unsigned int possible = nr_cpu_ids;
/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
- if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
+ if (!setup_max_cpus || ioapic_is_disabled || (apic_is_disabled && !xen_pv_domain()))
possible = 1;
/* 'possible_cpus=N' */
--
2.39.2
Hi Sasha,
I hope you are well!
If I'm not mistaken, I think you are the person managing the script
updating the 'queue/*' branches from the linux-stable-rc git tree, right?
It looks like the branch for the v6.1 is in fact tracking the v6.10
kernel instead:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/…
Are you the person who is able to fix that? If not, do you know who can
fix that?
These branches are really useful, not to have to apply patches from the
'stable-queue' tree. Thank you!
I also just noticed that they are used by CIs, e.g. the one from Linaro:
https://qa-reports.linaro.org/lkft/linux-stable-rc-queues-queue_6.1/
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
To check if mmc cqe is in halt state, need to check set/clear of CQHCI_HALT
bit. At this time, we need to check with &, not &&.
Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host")
Cc: stable(a)vger.kernel.org
Signed-off-by: Seunghwan Baek <sh8267.baek(a)samsung.com>
---
drivers/mmc/host/cqhci-core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c
index c14d7251d0bb..a02da26a1efd 100644
--- a/drivers/mmc/host/cqhci-core.c
+++ b/drivers/mmc/host/cqhci-core.c
@@ -617,7 +617,7 @@ static int cqhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
cqhci_writel(cq_host, 0, CQHCI_CTL);
mmc->cqe_on = true;
pr_debug("%s: cqhci: CQE on\n", mmc_hostname(mmc));
- if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) {
+ if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) {
pr_err("%s: cqhci: CQE failed to exit halt state\n",
mmc_hostname(mmc));
}
--
2.17.1
The panasonic laptop code in various places uses the sinf array with index
values of 0 - SINF_CUR_BRIGHT(0x0d) without checking that the sinf array
is big enough.
Check for a minimum SQTY value of SINF_CUR_BRIGHT to avoid out of bounds
accesses of the sinf array.
Note SQTY returning SINF_CUR_BRIGHT is ok because the driver adds one extra
entry to the sinf array.
Fixes: e424fb8cc4e6 ("panasonic-laptop: avoid overflow in acpi_pcc_hotkey_add()")
Cc: stable(a)vger.kernel.org
Tested-by: James Harmison <jharmison(a)redhat.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/platform/x86/panasonic-laptop.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c
index cf845ee1c7b1..d7f9017a5a13 100644
--- a/drivers/platform/x86/panasonic-laptop.c
+++ b/drivers/platform/x86/panasonic-laptop.c
@@ -963,8 +963,8 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device)
num_sifr = acpi_pcc_get_sqty(device);
- if (num_sifr < 0 || num_sifr > 255) {
- pr_err("num_sifr out of range");
+ if (num_sifr < SINF_CUR_BRIGHT || num_sifr > 255) {
+ pr_err("num_sifr %d out of range %d - 255\n", num_sifr, SINF_CUR_BRIGHT);
return -ENODEV;
}
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: adc: ad7173: fix GPIO device info
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From f242967f4d1c024ac42bb47ea50b6360b4cb4556 Mon Sep 17 00:00:00 2001
From: Dumitru Ceclan <mitrutzceclan(a)gmail.com>
Date: Fri, 9 Aug 2024 16:49:08 +0300
Subject: iio: adc: ad7173: fix GPIO device info
Models AD4114/5/6 have .higher_gpio_bits = true. This is not correct as
the only models that have the GPIO bits to a higher position are AD4111/2.
Fix by removing the higher_gpio_bits = true from the AD4114/5/6 models.
Fixes: 13d12e3ad12d ("iio: adc: ad7173: Add support for AD411x devices")
Signed-off-by: Dumitru Ceclan <dumitru.ceclan(a)analog.com>
Link: https://patch.msgid.link/20240809134909.26829-1-dumitru.ceclan@analog.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7173.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c
index a854f2d30174..0702ec71aa29 100644
--- a/drivers/iio/adc/ad7173.c
+++ b/drivers/iio/adc/ad7173.c
@@ -302,7 +302,6 @@ static const struct ad7173_device_info ad4114_device_info = {
.num_configs = 8,
.num_voltage_in = 16,
.num_gpios = 4,
- .higher_gpio_bits = true,
.has_vincom_input = true,
.has_temp = true,
.has_input_buf = true,
@@ -320,7 +319,6 @@ static const struct ad7173_device_info ad4115_device_info = {
.num_configs = 8,
.num_voltage_in = 16,
.num_gpios = 4,
- .higher_gpio_bits = true,
.has_vincom_input = true,
.has_temp = true,
.has_input_buf = true,
@@ -338,7 +336,6 @@ static const struct ad7173_device_info ad4116_device_info = {
.num_configs = 8,
.num_voltage_in = 16,
.num_gpios = 4,
- .higher_gpio_bits = true,
.has_vincom_input = true,
.has_temp = true,
.has_input_buf = true,
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: adc: ad7124: fix DT configuration parsing
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 61cbfb5368dd50ed0d65ce21d305aa923581db2b Mon Sep 17 00:00:00 2001
From: Dumitru Ceclan <mitrutzceclan(a)gmail.com>
Date: Tue, 6 Aug 2024 11:51:33 +0300
Subject: iio: adc: ad7124: fix DT configuration parsing
The cfg pointer is set before reading the channel number that the
configuration should point to. This causes configurations to be shifted
by one channel.
For example setting bipolar to the first channel defined in the DT will
cause bipolar mode to be active on the second defined channel.
Fix by moving the cfg pointer setting after reading the channel number.
Fixes: 7b8d045e497a ("iio: adc: ad7124: allow more than 8 channels")
Signed-off-by: Dumitru Ceclan <dumitru.ceclan(a)analog.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20240806085133.114547-1-dumitru.ceclan@analog.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7124.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index afb5f4d741e6..108e9ccab1ef 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -844,8 +844,6 @@ static int ad7124_parse_channel_config(struct iio_dev *indio_dev,
st->channels = channels;
device_for_each_child_node_scoped(dev, child) {
- cfg = &st->channels[channel].cfg;
-
ret = fwnode_property_read_u32(child, "reg", &channel);
if (ret)
return ret;
@@ -863,6 +861,7 @@ static int ad7124_parse_channel_config(struct iio_dev *indio_dev,
st->channels[channel].ain = AD7124_CHANNEL_AINP(ain[0]) |
AD7124_CHANNEL_AINM(ain[1]);
+ cfg = &st->channels[channel].cfg;
cfg->bipolar = fwnode_property_read_bool(child, "bipolar");
ret = fwnode_property_read_u32(child, "adi,reference-select", &tmp);
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: adc: ad_sigma_delta: fix irq_flags on irq request
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From e81bb580ec08d7503c14c92157d810d306290003 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa(a)analog.com>
Date: Tue, 6 Aug 2024 17:40:49 +0200
Subject: iio: adc: ad_sigma_delta: fix irq_flags on irq request
With commit 7b0c9f8fa3d2 ("iio: adc: ad_sigma_delta: Add optional irq
selection"), we can get the irq line from struct ad_sigma_delta_info
instead of the spi device. However, in devm_ad_sd_probe_trigger(), when
getting the irq_flags with irq_get_trigger_type() we are still using
the spi device irq instead of the one used for devm_request_irq().
Fixes: 7b0c9f8fa3d2 ("iio: adc: ad_sigma_delta: Add optional irq selection")
Signed-off-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20240806-dev-fix-ad-sigma-delta-v1-1-aa25b173c063@…
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad_sigma_delta.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
index 8c062b0d26e3..dcd557e93586 100644
--- a/drivers/iio/adc/ad_sigma_delta.c
+++ b/drivers/iio/adc/ad_sigma_delta.c
@@ -569,7 +569,7 @@ EXPORT_SYMBOL_NS_GPL(ad_sd_validate_trigger, IIO_AD_SIGMA_DELTA);
static int devm_ad_sd_probe_trigger(struct device *dev, struct iio_dev *indio_dev)
{
struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev);
- unsigned long irq_flags = irq_get_trigger_type(sigma_delta->spi->irq);
+ unsigned long irq_flags = irq_get_trigger_type(sigma_delta->irq_line);
int ret;
if (dev != &sigma_delta->spi->dev) {
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: fix scale application in iio_convert_raw_to_processed_unlocked
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 8a3dcc970dc57b358c8db2702447bf0af4e0d83a Mon Sep 17 00:00:00 2001
From: Matteo Martelli <matteomartelli3(a)gmail.com>
Date: Tue, 30 Jul 2024 10:11:53 +0200
Subject: iio: fix scale application in iio_convert_raw_to_processed_unlocked
When the scale_type is IIO_VAL_INT_PLUS_MICRO or IIO_VAL_INT_PLUS_NANO
the scale passed as argument is only applied to the fractional part of
the value. Fix it by also multiplying the integer part by the scale
provided.
Fixes: 48e44ce0f881 ("iio:inkern: Add function to read the processed value")
Signed-off-by: Matteo Martelli <matteomartelli3(a)gmail.com>
Link: https://patch.msgid.link/20240730-iio-fix-scale-v1-1-6246638c8daa@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/inkern.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 9f484c94bc6e..151099be2863 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -647,17 +647,17 @@ static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan,
break;
case IIO_VAL_INT_PLUS_MICRO:
if (scale_val2 < 0)
- *processed = -raw64 * scale_val;
+ *processed = -raw64 * scale_val * scale;
else
- *processed = raw64 * scale_val;
+ *processed = raw64 * scale_val * scale;
*processed += div_s64(raw64 * (s64)scale_val2 * scale,
1000000LL);
break;
case IIO_VAL_INT_PLUS_NANO:
if (scale_val2 < 0)
- *processed = -raw64 * scale_val;
+ *processed = -raw64 * scale_val * scale;
else
- *processed = raw64 * scale_val;
+ *processed = raw64 * scale_val * scale;
*processed += div_s64(raw64 * (s64)scale_val2 * scale,
1000000000LL);
break;
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: adc: ad7124: fix config comparison
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 2f6b92d0f69f04d9e2ea0db1228ab7f82f3173af Mon Sep 17 00:00:00 2001
From: Dumitru Ceclan <mitrutzceclan(a)gmail.com>
Date: Wed, 31 Jul 2024 15:37:23 +0300
Subject: iio: adc: ad7124: fix config comparison
The ad7124_find_similar_live_cfg() computes the compare size by
substracting the address of the cfg struct from the address of the live
field. Because the live field is the first field in the struct, the
result is 0.
Also, the memcmp() call is made from the start of the cfg struct, which
includes the live and cfg_slot fields, which are not relevant for the
comparison.
Fix by grouping the relevant fields with struct_group() and use the
size of the group to compute the compare size; make the memcmp() call
from the address of the group.
Fixes: 7b8d045e497a ("iio: adc: ad7124: allow more than 8 channels")
Signed-off-by: Dumitru Ceclan <dumitru.ceclan(a)analog.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20240731-ad7124-fix-v1-2-46a76aa4b9be@analog.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7124.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index c0b82f64c976..afb5f4d741e6 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -147,15 +147,18 @@ struct ad7124_chip_info {
struct ad7124_channel_config {
bool live;
unsigned int cfg_slot;
- enum ad7124_ref_sel refsel;
- bool bipolar;
- bool buf_positive;
- bool buf_negative;
- unsigned int vref_mv;
- unsigned int pga_bits;
- unsigned int odr;
- unsigned int odr_sel_bits;
- unsigned int filter_type;
+ /* Following fields are used to compare equality. */
+ struct_group(config_props,
+ enum ad7124_ref_sel refsel;
+ bool bipolar;
+ bool buf_positive;
+ bool buf_negative;
+ unsigned int vref_mv;
+ unsigned int pga_bits;
+ unsigned int odr;
+ unsigned int odr_sel_bits;
+ unsigned int filter_type;
+ );
};
struct ad7124_channel {
@@ -334,11 +337,12 @@ static struct ad7124_channel_config *ad7124_find_similar_live_cfg(struct ad7124_
ptrdiff_t cmp_size;
int i;
- cmp_size = (u8 *)&cfg->live - (u8 *)cfg;
+ cmp_size = sizeof_field(struct ad7124_channel_config, config_props);
for (i = 0; i < st->num_channels; i++) {
cfg_aux = &st->channels[i].cfg;
- if (cfg_aux->live && !memcmp(cfg, cfg_aux, cmp_size))
+ if (cfg_aux->live &&
+ !memcmp(&cfg->config_props, &cfg_aux->config_props, cmp_size))
return cfg_aux;
}
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: adc: ad7124: fix chip ID mismatch
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 96f9ab0d5933c1c00142dd052f259fce0bc3ced2 Mon Sep 17 00:00:00 2001
From: Dumitru Ceclan <mitrutzceclan(a)gmail.com>
Date: Wed, 31 Jul 2024 15:37:22 +0300
Subject: iio: adc: ad7124: fix chip ID mismatch
The ad7124_soft_reset() function has the assumption that the chip will
assert the "power-on reset" bit in the STATUS register after a software
reset without any delay. The POR bit =0 is used to check if the chip
initialization is done.
A chip ID mismatch probe error appears intermittently when the probe
continues too soon and the ID register does not contain the expected
value.
Fix by adding a 200us delay after the software reset command is issued.
Fixes: b3af341bbd96 ("iio: adc: Add ad7124 support")
Signed-off-by: Dumitru Ceclan <dumitru.ceclan(a)analog.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20240731-ad7124-fix-v1-1-46a76aa4b9be@analog.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7124.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index 3beed78496c5..c0b82f64c976 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -764,6 +764,7 @@ static int ad7124_soft_reset(struct ad7124_state *st)
if (ret < 0)
return ret;
+ fsleep(200);
timeout = 100;
do {
ret = ad_sd_read_reg(&st->sd, AD7124_STATUS, 1, &readval);
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: adc: ad7606: remove frstdata check for serial mode
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 90826e08468ba7fb35d8b39645b22d9e80004afe Mon Sep 17 00:00:00 2001
From: Guillaume Stols <gstols(a)baylibre.com>
Date: Tue, 2 Jul 2024 12:52:51 +0000
Subject: iio: adc: ad7606: remove frstdata check for serial mode
The current implementation attempts to recover from an eventual glitch
in the clock by checking frstdata state after reading the first
channel's sample: If frstdata is low, it will reset the chip and
return -EIO.
This will only work in parallel mode, where frstdata pin is set low
after the 2nd sample read starts.
For the serial mode, according to the datasheet, "The FRSTDATA output
returns to a logic low following the 16th SCLK falling edge.", thus
after the Xth pulse, X being the number of bits in a sample, the check
will always be true, and the driver will not work at all in serial
mode if frstdata(optional) is defined in the devicetree as it will
reset the chip, and return -EIO every time read_sample is called.
Hence, this check must be removed for serial mode.
Fixes: b9618c0cacd7 ("staging: IIO: ADC: New driver for AD7606/AD7606-6/AD7606-4")
Signed-off-by: Guillaume Stols <gstols(a)baylibre.com>
Reviewed-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://patch.msgid.link/20240702-cleanup-ad7606-v3-1-18d5ea18770e@baylibre…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/adc/ad7606.c | 28 ++--------------------
drivers/iio/adc/ad7606.h | 2 ++
drivers/iio/adc/ad7606_par.c | 46 ++++++++++++++++++++++++++++++++++--
3 files changed, 48 insertions(+), 28 deletions(-)
diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c
index 3a417595294f..c321c6ef48df 100644
--- a/drivers/iio/adc/ad7606.c
+++ b/drivers/iio/adc/ad7606.c
@@ -49,7 +49,7 @@ static const unsigned int ad7616_oversampling_avail[8] = {
1, 2, 4, 8, 16, 32, 64, 128,
};
-static int ad7606_reset(struct ad7606_state *st)
+int ad7606_reset(struct ad7606_state *st)
{
if (st->gpio_reset) {
gpiod_set_value(st->gpio_reset, 1);
@@ -60,6 +60,7 @@ static int ad7606_reset(struct ad7606_state *st)
return -ENODEV;
}
+EXPORT_SYMBOL_NS_GPL(ad7606_reset, IIO_AD7606);
static int ad7606_reg_access(struct iio_dev *indio_dev,
unsigned int reg,
@@ -88,31 +89,6 @@ static int ad7606_read_samples(struct ad7606_state *st)
{
unsigned int num = st->chip_info->num_channels - 1;
u16 *data = st->data;
- int ret;
-
- /*
- * The frstdata signal is set to high while and after reading the sample
- * of the first channel and low for all other channels. This can be used
- * to check that the incoming data is correctly aligned. During normal
- * operation the data should never become unaligned, but some glitch or
- * electrostatic discharge might cause an extra read or clock cycle.
- * Monitoring the frstdata signal allows to recover from such failure
- * situations.
- */
-
- if (st->gpio_frstdata) {
- ret = st->bops->read_block(st->dev, 1, data);
- if (ret)
- return ret;
-
- if (!gpiod_get_value(st->gpio_frstdata)) {
- ad7606_reset(st);
- return -EIO;
- }
-
- data++;
- num--;
- }
return st->bops->read_block(st->dev, num, data);
}
diff --git a/drivers/iio/adc/ad7606.h b/drivers/iio/adc/ad7606.h
index 0c6a88cc4695..6649e84d25de 100644
--- a/drivers/iio/adc/ad7606.h
+++ b/drivers/iio/adc/ad7606.h
@@ -151,6 +151,8 @@ int ad7606_probe(struct device *dev, int irq, void __iomem *base_address,
const char *name, unsigned int id,
const struct ad7606_bus_ops *bops);
+int ad7606_reset(struct ad7606_state *st);
+
enum ad7606_supported_device_ids {
ID_AD7605_4,
ID_AD7606_8,
diff --git a/drivers/iio/adc/ad7606_par.c b/drivers/iio/adc/ad7606_par.c
index d8408052262e..6bc587b20f05 100644
--- a/drivers/iio/adc/ad7606_par.c
+++ b/drivers/iio/adc/ad7606_par.c
@@ -7,6 +7,7 @@
#include <linux/mod_devicetable.h>
#include <linux/module.h>
+#include <linux/gpio/consumer.h>
#include <linux/platform_device.h>
#include <linux/types.h>
#include <linux/err.h>
@@ -21,8 +22,29 @@ static int ad7606_par16_read_block(struct device *dev,
struct iio_dev *indio_dev = dev_get_drvdata(dev);
struct ad7606_state *st = iio_priv(indio_dev);
- insw((unsigned long)st->base_address, buf, count);
+ /*
+ * On the parallel interface, the frstdata signal is set to high while
+ * and after reading the sample of the first channel and low for all
+ * other channels. This can be used to check that the incoming data is
+ * correctly aligned. During normal operation the data should never
+ * become unaligned, but some glitch or electrostatic discharge might
+ * cause an extra read or clock cycle. Monitoring the frstdata signal
+ * allows to recover from such failure situations.
+ */
+ int num = count;
+ u16 *_buf = buf;
+
+ if (st->gpio_frstdata) {
+ insw((unsigned long)st->base_address, _buf, 1);
+ if (!gpiod_get_value(st->gpio_frstdata)) {
+ ad7606_reset(st);
+ return -EIO;
+ }
+ _buf++;
+ num--;
+ }
+ insw((unsigned long)st->base_address, _buf, num);
return 0;
}
@@ -35,8 +57,28 @@ static int ad7606_par8_read_block(struct device *dev,
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
struct ad7606_state *st = iio_priv(indio_dev);
+ /*
+ * On the parallel interface, the frstdata signal is set to high while
+ * and after reading the sample of the first channel and low for all
+ * other channels. This can be used to check that the incoming data is
+ * correctly aligned. During normal operation the data should never
+ * become unaligned, but some glitch or electrostatic discharge might
+ * cause an extra read or clock cycle. Monitoring the frstdata signal
+ * allows to recover from such failure situations.
+ */
+ int num = count;
+ u16 *_buf = buf;
- insb((unsigned long)st->base_address, buf, count * 2);
+ if (st->gpio_frstdata) {
+ insb((unsigned long)st->base_address, _buf, 2);
+ if (!gpiod_get_value(st->gpio_frstdata)) {
+ ad7606_reset(st);
+ return -EIO;
+ }
+ _buf++;
+ num--;
+ }
+ insb((unsigned long)st->base_address, _buf, num * 2);
return 0;
}
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: buffer-dmaengine: fix releasing dma channel on error
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 84c65d8008764a8fb4e627ff02de01ec4245f2c4 Mon Sep 17 00:00:00 2001
From: David Lechner <dlechner(a)baylibre.com>
Date: Tue, 23 Jul 2024 11:32:21 -0500
Subject: iio: buffer-dmaengine: fix releasing dma channel on error
If dma_get_slave_caps() fails, we need to release the dma channel before
returning an error to avoid leaking the channel.
Fixes: 2d6ca60f3284 ("iio: Add a DMAengine framework based buffer")
Signed-off-by: David Lechner <dlechner(a)baylibre.com>
Link: https://patch.msgid.link/20240723-iio-fix-dmaengine-free-on-error-v1-1-2c7c…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/buffer/industrialio-buffer-dmaengine.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
index 12aa1412dfa0..426cc614587a 100644
--- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c
+++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
@@ -237,7 +237,7 @@ static struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev,
ret = dma_get_slave_caps(chan, &caps);
if (ret < 0)
- goto err_free;
+ goto err_release;
/* Needs to be aligned to the maximum of the minimums */
if (caps.src_addr_widths)
@@ -263,6 +263,8 @@ static struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev,
return &dmaengine_buffer->queue.buffer;
+err_release:
+ dma_release_channel(chan);
err_free:
kfree(dmaengine_buffer);
return ERR_PTR(ret);
--
2.46.0
This is a note to let you know that I've just added the patch titled
staging: iio: frequency: ad9834: Validate frequency parameter value
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From b48aa991758999d4e8f9296c5bbe388f293ef465 Mon Sep 17 00:00:00 2001
From: Aleksandr Mishin <amishin(a)t-argos.ru>
Date: Wed, 3 Jul 2024 18:45:06 +0300
Subject: staging: iio: frequency: ad9834: Validate frequency parameter value
In ad9834_write_frequency() clk_get_rate() can return 0. In such case
ad9834_calc_freqreg() call will lead to division by zero. Checking
'if (fout > (clk_freq / 2))' doesn't protect in case of 'fout' is 0.
ad9834_write_frequency() is called from ad9834_write(), where fout is
taken from text buffer, which can contain any value.
Modify parameters checking.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: 12b9d5bf76bf ("Staging: IIO: DDS: AD9833 / AD9834 driver")
Suggested-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Signed-off-by: Aleksandr Mishin <amishin(a)t-argos.ru>
Reviewed-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Link: https://patch.msgid.link/20240703154506.25584-1-amishin@t-argos.ru
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/staging/iio/frequency/ad9834.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/iio/frequency/ad9834.c b/drivers/staging/iio/frequency/ad9834.c
index a7a5cdcc6590..47e7d7e6d920 100644
--- a/drivers/staging/iio/frequency/ad9834.c
+++ b/drivers/staging/iio/frequency/ad9834.c
@@ -114,7 +114,7 @@ static int ad9834_write_frequency(struct ad9834_state *st,
clk_freq = clk_get_rate(st->mclk);
- if (fout > (clk_freq / 2))
+ if (!clk_freq || fout > (clk_freq / 2))
return -EINVAL;
regval = ad9834_calc_freqreg(clk_freq, fout);
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: pressure: bmp280: Fix waiting time for BMP3xx configuration
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-next branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will also be merged in the next major kernel release
during the merge window.
If you have any questions about this process, please let me know.
From 262a6634bcc4f0c1c53d13aa89882909f281a6aa Mon Sep 17 00:00:00 2001
From: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Date: Thu, 11 Jul 2024 23:15:50 +0200
Subject: iio: pressure: bmp280: Fix waiting time for BMP3xx configuration
According to the datasheet, both pressure and temperature can go up to
oversampling x32. With this option, the maximum measurement time is not
80ms (this is for press x32 and temp x2), but it is 130ms nominal
(calculated from table 3.9.2) and since most of the maximum values
are around +15%, it is configured to 150ms.
Fixes: 8d329309184d ("iio: pressure: bmp280: Add support for BMP380 sensor family")
Signed-off-by: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Link: https://patch.msgid.link/20240711211558.106327-3-vassilisamir@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/pressure/bmp280-core.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c
index cc8553177977..3deaa57bb3f5 100644
--- a/drivers/iio/pressure/bmp280-core.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -1581,10 +1581,11 @@ static int bmp380_chip_config(struct bmp280_data *data)
}
/*
* Waits for measurement before checking configuration error
- * flag. Selected longest measure time indicated in
- * section 3.9.1 in the datasheet.
+ * flag. Selected longest measurement time, calculated from
+ * formula in datasheet section 3.9.2 with an offset of ~+15%
+ * as it seen as well in table 3.9.1.
*/
- msleep(80);
+ msleep(150);
/* Check config error flag */
ret = regmap_read(data->regmap, BMP380_REG_ERROR, &tmp);
--
2.46.0
This is a note to let you know that I've just added the patch titled
iio: pressure: bmp280: Fix waiting time for BMP3xx configuration
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-testing branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will be merged to the char-misc-next branch sometime soon,
after it passes testing, and the merge window is open.
If you have any questions about this process, please let me know.
From 262a6634bcc4f0c1c53d13aa89882909f281a6aa Mon Sep 17 00:00:00 2001
From: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Date: Thu, 11 Jul 2024 23:15:50 +0200
Subject: iio: pressure: bmp280: Fix waiting time for BMP3xx configuration
According to the datasheet, both pressure and temperature can go up to
oversampling x32. With this option, the maximum measurement time is not
80ms (this is for press x32 and temp x2), but it is 130ms nominal
(calculated from table 3.9.2) and since most of the maximum values
are around +15%, it is configured to 150ms.
Fixes: 8d329309184d ("iio: pressure: bmp280: Add support for BMP380 sensor family")
Signed-off-by: Vasileios Amoiridis <vassilisamir(a)gmail.com>
Link: https://patch.msgid.link/20240711211558.106327-3-vassilisamir@gmail.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/pressure/bmp280-core.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c
index cc8553177977..3deaa57bb3f5 100644
--- a/drivers/iio/pressure/bmp280-core.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -1581,10 +1581,11 @@ static int bmp380_chip_config(struct bmp280_data *data)
}
/*
* Waits for measurement before checking configuration error
- * flag. Selected longest measure time indicated in
- * section 3.9.1 in the datasheet.
+ * flag. Selected longest measurement time, calculated from
+ * formula in datasheet section 3.9.2 with an offset of ~+15%
+ * as it seen as well in table 3.9.1.
*/
- msleep(80);
+ msleep(150);
/* Check config error flag */
ret = regmap_read(data->regmap, BMP380_REG_ERROR, &tmp);
--
2.46.0
Stream endpoint can skip part of TD during next transfer initialization
after beginning stopped during active stream data transfer.
The Set TR Dequeue Pointer command does not clear all internal
transfer-related variables that position stream endpoint on transfer ring.
USB Controller stores all endpoint state information within RsvdO fields
inside endpoint context structure. For stream endpoints, all relevant
information regarding particular StreamID is stored within corresponding
Stream Endpoint context.
Whenever driver wants to stop stream endpoint traffic, it invokes
Stop Endpoint command which forces the controller to dump all endpoint
state-related variables into RsvdO spaces into endpoint context and stream
endpoint context. Whenever driver wants to reinitialize endpoint starting
point on Transfer Ring, it uses the Set TR Dequeue Pointer command
to update dequeue pointer for particular stream in Stream Endpoint
Context. When stream endpoint is forced to stop active transfer in the
middle of TD, it dumps an information about TRB bytes left in RsvdO fields
in Stream Endpoint Context which will be used in next transfer
initialization to designate starting point for XDMA. This field is not
cleared during Set TR Dequeue Pointer command which causes XDMA to skip
over transfer ring and leads to data loss on stream pipe.
Patch fixes this by clearing out all RsvdO fields before initializing new
transfer via that StreamID.
Field Rsvd0 is reserved field, so patch should not have impact for other
xHCI controllers.
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
cc: <stable(a)vger.kernel.org>
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
---
Changelog:
v2:
- removed restoring of EDTLA field
drivers/usb/host/xhci-ring.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 1dde53f6eb31..e5e1d665adab 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1386,6 +1386,16 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id,
struct xhci_stream_ctx *ctx =
&ep->stream_info->stream_ctx_array[stream_id];
deq = le64_to_cpu(ctx->stream_ring) & SCTX_DEQ_MASK;
+
+ /*
+ * Existing Cadence xHCI controllers store some endpoint state information
+ * within Rsvd0 fields of Stream Endpoint context. This field is not
+ * cleared during Set TR Dequeue Pointer command which causes XDMA to skip
+ * over transfer ring and leads to data loss on stream pipe.
+ * To fix this issue driver must clear Rsvd0 field.
+ */
+ ctx->reserved[0] = 0;
+ ctx->reserved[1] = 0;
} else {
deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK;
}
--
2.43.0
The patch titled
Subject: ocfs2: fix possible null-ptr-deref in ocfs2_set_buffer_uptodate
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
ocfs2-fix-possible-null-ptr-deref-in-ocfs2_set_buffer_uptodate.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Lizhi Xu <lizhi.xu(a)windriver.com>
Subject: ocfs2: fix possible null-ptr-deref in ocfs2_set_buffer_uptodate
Date: Mon, 2 Sep 2024 10:36:36 +0800
When doing cleanup, if flags without OCFS2_BH_READAHEAD, it may trigger
NULL pointer dereference in the following ocfs2_set_buffer_uptodate() if
bh is NULL.
Link: https://lkml.kernel.org/r/20240902023636.1843422-3-joseph.qi@linux.alibaba.…
Fixes: cf76c78595ca ("ocfs2: don't put and assigning null to bh allocated outside")
Signed-off-by: Lizhi Xu <lizhi.xu(a)windriver.com>
Signed-off-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Reported-by: Heming Zhao <heming.zhao(a)suse.com>
Suggested-by: Heming Zhao <heming.zhao(a)suse.com>
Cc: <stable(a)vger.kernel.org> [4.20+]
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/buffer_head_io.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/fs/ocfs2/buffer_head_io.c~ocfs2-fix-possible-null-ptr-deref-in-ocfs2_set_buffer_uptodate
+++ a/fs/ocfs2/buffer_head_io.c
@@ -388,7 +388,8 @@ read_failure:
/* Always set the buffer in the cache, even if it was
* a forced read, or read-ahead which hasn't yet
* completed. */
- ocfs2_set_buffer_uptodate(ci, bh);
+ if (bh)
+ ocfs2_set_buffer_uptodate(ci, bh);
}
ocfs2_metadata_cache_io_unlock(ci);
_
Patches currently in -mm which might be from lizhi.xu(a)windriver.com are
ocfs2-remove-unreasonable-unlock-in-ocfs2_read_blocks.patch
ocfs2-fix-possible-null-ptr-deref-in-ocfs2_set_buffer_uptodate.patch
The patch titled
Subject: ocfs2: remove unreasonable unlock in ocfs2_read_blocks
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
ocfs2-remove-unreasonable-unlock-in-ocfs2_read_blocks.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Lizhi Xu <lizhi.xu(a)windriver.com>
Subject: ocfs2: remove unreasonable unlock in ocfs2_read_blocks
Date: Mon, 2 Sep 2024 10:36:35 +0800
Patch series "Misc fixes for ocfs2_read_blocks", v5.
This series contains 2 fixes for ocfs2_read_blocks(). The first patch fix
the issue reported by syzbot, which detects bad unlock balance in
ocfs2_read_blocks(). The second patch fixes an issue reported by Heming
Zhao when reviewing above fix.
This patch (of 2):
There was a lock release before exiting, so remove the unreasonable unlock.
Link: https://lkml.kernel.org/r/20240902023636.1843422-1-joseph.qi@linux.alibaba.…
Link: https://lkml.kernel.org/r/20240902023636.1843422-2-joseph.qi@linux.alibaba.…
Fixes: cf76c78595ca ("ocfs2: don't put and assigning null to bh allocated outside")
Signed-off-by: Lizhi Xu <lizhi.xu(a)windriver.com>
Signed-off-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Reviewed-by: Heming Zhao <heming.zhao(a)suse.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Reported-by: syzbot+ab134185af9ef88dfed5(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=ab134185af9ef88dfed5
Tested-by: syzbot+ab134185af9ef88dfed5(a)syzkaller.appspotmail.com
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org> [4.20+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/buffer_head_io.c | 1 -
1 file changed, 1 deletion(-)
--- a/fs/ocfs2/buffer_head_io.c~ocfs2-remove-unreasonable-unlock-in-ocfs2_read_blocks
+++ a/fs/ocfs2/buffer_head_io.c
@@ -235,7 +235,6 @@ int ocfs2_read_blocks(struct ocfs2_cachi
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(sb, block++);
if (bhs[i] == NULL) {
- ocfs2_metadata_cache_io_unlock(ci);
status = -ENOMEM;
mlog_errno(status);
/* Don't forget to put previous bh! */
_
Patches currently in -mm which might be from lizhi.xu(a)windriver.com are
ocfs2-remove-unreasonable-unlock-in-ocfs2_read_blocks.patch
ocfs2-fix-possible-null-ptr-deref-in-ocfs2_set_buffer_uptodate.patch
I get "out of memory" build error on Slackware 15.0 (x86) with kernels
5.15.164, 5.15.165, and 5.15.166-rc1.
#uname -a
Linux aragorn 5.15.166-rc1-smp #1 SMP PREEMPT Mon Sep 2 14:03:00 PDT 2024
i686 AMD Ryzen 9 5900X 12-Core Processor AuthenticAMD GNU/Linux
The attached configuration I use is the standard Slackware 15.0:
config-huge-smp-5.15.161-smp
Here is some of the error log for the 5.15.166-rc1 build:
...
LD [M] drivers/mtd/tests/mtd_stresstest.o
LD [M] drivers/pcmcia/pcmcia_core.o
LD [M] drivers/mtd/tests/mtd_subpagetest.o
cc1: out of memory allocating 180705472 bytes after a total of 283914240
bytes
LD [M] drivers/mtd/tests/mtd_torturetest.o
CC [M] drivers/mtd/ubi/wl.o
LD [M] drivers/pcmcia/pcmcia.o
CC [M] drivers/gpu/drm/nouveau/nvkm/engine/disp/headgv100.o
CC [M] drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dmub_hw_lock_mgr.o
LD [M] drivers/mtd/tests/mtd_nandbiterrs.o
CC [M] drivers/mtd/ubi/attach.o
LD [M] drivers/staging/qlge/qlge.o
make[4]: *** [scripts/Makefile.build:289:
drivers/staging/media/atomisp/pci/isp/kernels/ynr/ynr_1.0/ia_css_ynr.host.o]
Er
ror 1
make[3]: *** [scripts/Makefile.build:552: drivers/staging/media/atomisp]
Error 2
make[2]: *** [scripts/Makefile.build:552: drivers/staging/media] Error 2
make[2]: *** Waiting for unfinished jobs....
LD [M] drivers/pcmcia/pcmcia_rsrc.o
CC [M] drivers/gpu/drm/amd/amdgpu/../display/dc/dce/dmub_outbox.o
make[1]: *** [scripts/Makefile.build:552: drivers/staging] Error 2
make[1]: *** Waiting for unfinished jobs....
...
I have a work around...
When I unpatch these 6 minmax patches taken from kernel-5.15.164
then the errors are fixed!
minmax: allow comparisons of 'int' against 'unsigned char/short'
minmax: allow min()/max()/clamp() if the arguments have the same
minmax: clamp more efficiently by avoiding extra comparison
minmax: fix header inclusions
minmax: relax check to allow comparison between unsigned arguments
minmax: sanity check constant bounds when clamping
Can the minmax patches be removed or fixed?
The patch titled
Subject: ocfs2: fix null-ptr-deref when journal load failed.
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
ocfs2-fix-null-ptr-deref-when-journal-load-failed.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Julian Sun <sunjunchao2870(a)gmail.com>
Subject: ocfs2: fix null-ptr-deref when journal load failed.
Date: Mon, 2 Sep 2024 11:08:44 +0800
During the mounting process, if journal_reset() fails because of too short
journal, then lead to jbd2_journal_load() fails with NULL j_sb_buffer.
Subsequently, ocfs2_journal_shutdown() calls
jbd2_journal_flush()->jbd2_cleanup_journal_tail()->
__jbd2_update_log_tail()->jbd2_journal_update_sb_log_tail()
->lock_buffer(journal->j_sb_buffer), resulting in a null-pointer
dereference error.
To resolve this issue, we should check the JBD2_LOADED flag to ensure the
journal was properly loaded. Additionally, use journal instead of
osb->journal directly to simplify the code.
Link: https://syzkaller.appspot.com/bug?extid=05b9b39d8bdfe1a0861f
Link: https://lkml.kernel.org/r/20240902030844.422725-1-sunjunchao2870@gmail.com
Fixes: f6f50e28f0cb ("jbd2: Fail to load a journal if it is too short")
Signed-off-by: Julian Sun <sunjunchao2870(a)gmail.com>
Reported-by: syzbot+05b9b39d8bdfe1a0861f(a)syzkaller.appspotmail.com
Suggested-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/journal.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
--- a/fs/ocfs2/journal.c~ocfs2-fix-null-ptr-deref-when-journal-load-failed
+++ a/fs/ocfs2/journal.c
@@ -1055,7 +1055,7 @@ void ocfs2_journal_shutdown(struct ocfs2
if (!igrab(inode))
BUG();
- num_running_trans = atomic_read(&(osb->journal->j_num_trans));
+ num_running_trans = atomic_read(&(journal->j_num_trans));
trace_ocfs2_journal_shutdown(num_running_trans);
/* Do a commit_cache here. It will flush our journal, *and*
@@ -1074,9 +1074,10 @@ void ocfs2_journal_shutdown(struct ocfs2
osb->commit_task = NULL;
}
- BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
+ BUG_ON(atomic_read(&(journal->j_num_trans)) != 0);
- if (ocfs2_mount_local(osb)) {
+ if (ocfs2_mount_local(osb) &&
+ (journal->j_journal->j_flags & JBD2_LOADED)) {
jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal);
_
Patches currently in -mm which might be from sunjunchao2870(a)gmail.com are
ocfs2-fix-null-ptr-deref-when-journal-load-failed.patch
This is something that I've been thinking about for a while. We had a
discussion at LPC 2020 about this[1] but the proposals suggested there
never materialised.
In short, it is quite difficult for userspace to detect the feature
capability of syscalls at runtime. This is something a lot of programs
want to do, but they are forced to create elaborate scenarios to try to
figure out if a feature is supported without causing damage to the
system. For the vast majority of cases, each individual feature also
needs to be tested individually (because syscall results are
all-or-nothing), so testing even a single syscall's feature set can
easily inflate the startup time of programs.
This patchset implements the fairly minimal design I proposed in this
talk[2] and in some old LKML threads (though I can't find the exact
references ATM). The general flow looks like:
1. Userspace will indicate to the kernel that a syscall should a be
no-op by setting the top bit of the extensible struct size argument.
We will almost certainly never support exabyte sized structs, so the
top bits are free for us to use as makeshift flag bits. This is
preferable to using the per-syscall flag field inside the structure
because seccomp can easily detect the bit in the flag and allow the
probe or forcefully return -EEXTSYS_NOOP.
2. The kernel will then fill the provided structure with every valid
bit pattern that the current kernel understands.
For flags or other bitflag-like fields, this is the set of valid
flags or bits. For pointer fields or fields that take an arbitrary
value, the field has every bit set (0xFF... to fill the field) to
indicate that any value is valid in the field.
3. The syscall then returns -EEXTSYS_NOOP which is an errno that will
only ever be used for this purpose (so userspace can be sure that
the request succeeded).
On older kernels, the syscall will return a different error (usually
-E2BIG or -EFAULT) and userspace can do their old-fashioned checks.
4. Userspace can then check which flags and fields are supported by
looking at the fields in the returned structure. Flags are checked
by doing an AND with the flags field, and field support can checked
by comparing to 0. In principle you could just AND the entire
structure if you wanted to do this check generically without caring
about the structure contents (this is what libraries might consider
doing).
Userspace can even find out the internal kernel structure size by
passing a PAGE_SIZE buffer and seeing how many bytes are non-zero.
As with copy_struct_from_user(), this is designed to be forward- and
backwards- compatible.
This allows programas to get a one-shot understanding of what features a
syscall supports without having to do any elaborate setups or tricks to
detect support for destructive features. Flags can simply be ANDed to
check if they are in the supported set, and fields can just be checked
to see if they are non-zero.
This patchset is IMHO the simplest way we can add the ability to
introspect the feature set of extensible struct (copy_struct_from_user)
syscalls. It doesn't preclude the chance of a more generic mechanism
being added later.
The intended way of using this interface to get feature information
looks something like the following (imagine that openat2 has gained a
new field and a new flag in the future):
static bool openat2_no_automount_supported;
static bool openat2_cwd_fd_supported;
int check_openat2_support(void)
{
int err;
struct open_how how = {};
err = openat2(AT_FDCWD, ".", &how, CHECK_FIELDS | sizeof(how));
assert(err < 0);
switch (errno) {
case EFAULT: case E2BIG:
/* Old kernel... */
check_support_the_old_way();
break;
case EEXTSYS_NOOP:
openat2_no_automount_supported = (how.flags & RESOLVE_NO_AUTOMOUNT);
openat2_cwd_fd_supported = (how.cwd_fd != 0);
break;
}
}
[1]: https://lwn.net/Articles/830666/
[2]: https://youtu.be/ggD-eb3yPVs
Signed-off-by: Aleksa Sarai <cyphar(a)cyphar.com>
---
Aleksa Sarai (8):
uaccess: add copy_struct_to_user helper
sched_getattr: port to copy_struct_to_user
openat2: explicitly return -E2BIG for (usize > PAGE_SIZE)
openat2: add CHECK_FIELDS flag to usize argument
clone3: add CHECK_FIELDS flag to usize argument
selftests: openat2: add 0xFF poisoned data after misaligned struct
selftests: openat2: add CHECK_FIELDS selftests
selftests: clone3: add CHECK_FIELDS selftests
fs/open.c | 17 ++
include/linux/uaccess.h | 98 +++++++++
include/uapi/asm-generic/errno.h | 3 +
include/uapi/linux/openat2.h | 2 +
kernel/fork.c | 33 ++-
kernel/sched/syscalls.c | 42 +---
tools/testing/selftests/clone3/.gitignore | 1 +
tools/testing/selftests/clone3/Makefile | 2 +-
.../testing/selftests/clone3/clone3_check_fields.c | 229 +++++++++++++++++++++
tools/testing/selftests/openat2/openat2_test.c | 126 +++++++++++-
10 files changed, 504 insertions(+), 49 deletions(-)
---
base-commit: 431c1646e1f86b949fa3685efc50b660a364c2b6
change-id: 20240803-extensible-structs-check_fields-a47e94cef691
Best regards,
--
Aleksa Sarai <cyphar(a)cyphar.com>
The DPI display interface feeds the external display pipeline. However
the pipeline representation is currently incomplete. Efforts are still
under way to come up with a way to represent the "creative" repurposing
of the DP bridge chip's internal output mux, which is meant to support
USB type-C orientation changes, to output to one of two type-C ports.
Until that is finalized, the external display can't be fully described,
and thus won't work. Even worse, the half complete graph potentially
confuses the OS, breaking the internal display as well.
Disable the external display interface across the whole Corsola family
until the DP / USB Type-C muxing graph binding is ready.
Reported-by: Alper Nebi Yasak <alpernebiyasak(a)gmail.com>
Closes: https://lore.kernel.org/linux-mediatek/38a703a9-6efb-456a-a248-1dd3687e526d…
Fixes: 8855d01fb81f ("arm64: dts: mediatek: Add MT8186 Krabby platform based Tentacruel / Tentacool")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Chen-Yu Tsai <wenst(a)chromium.org>
---
Stephen has recently posted the "platform/chrome: Add DT USB/DP
muxing/topology support" patch series, which is now up to v3 [1].
More work based on this series is needed for the DP bridge drivers.
[1] https://lore.kernel.org/dri-devel/20240819223834.2049862-1-swboyd@chromium.…
---
arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi
index 0c4a26117428..682c6ad2574d 100644
--- a/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8186-corsola.dtsi
@@ -353,7 +353,8 @@ &dpi {
pinctrl-names = "default", "sleep";
pinctrl-0 = <&dpi_pins_default>;
pinctrl-1 = <&dpi_pins_sleep>;
- status = "okay";
+ /* TODO Re-enable after DP to Type-C port muxing can be described */
+ status = "disabled";
};
&dpi_out {
--
2.46.0.184.g6999bdac58-goog
From: "yenchia.chen" <yenchia.chen(a)mediatek.com>
We have met a deadlock issue on our device when resuming.
After applying this patch which is picked from mainline, issue solved.
We'd like to backport to 5.15.y and could you help to review? thanks.
[ Upstream commit 3e999770ac1c7c31a70685dd5b88e89473509e9c ]
Rafael J. Wysocki (1):
PM: sleep: Restore asynchronous device resume optimization
drivers/base/power/main.c | 117 +++++++++++++++++++++-----------------
include/linux/pm.h | 1 +
2 files changed, 65 insertions(+), 53 deletions(-)
--
2.18.0
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 3d5c2f8e75a55cfb11a85086c71996af0354a1fb
Gitweb: https://git.kernel.org/tip/3d5c2f8e75a55cfb11a85086c71996af0354a1fb
Author: Jacky Bai <ping.bai(a)nxp.com>
AuthorDate: Thu, 25 Jul 2024 15:33:55 -04:00
Committer: Daniel Lezcano <daniel.lezcano(a)linaro.org>
CommitterDate: Mon, 02 Sep 2024 10:04:15 +02:00
clocksource/drivers/imx-tpm: Fix next event not taking effect sometime
The value written into the TPM CnV can only be updated into the hardware
when the counter increases. Additional writes to the CnV write buffer are
ignored until the register has been updated. Therefore, we need to check
if the CnV has been updated before continuing. This may require waiting for
1 counter cycle in the worst case.
Cc: stable(a)vger.kernel.org
Fixes: 059ab7b82eec ("clocksource/drivers/imx-tpm: Add imx tpm timer support")
Signed-off-by: Jacky Bai <ping.bai(a)nxp.com>
Reviewed-by: Peng Fan <peng.fan(a)nxp.com>
Reviewed-by: Ye Li <ye.li(a)nxp.com>
Reviewed-by: Jason Liu <jason.hui.liu(a)nxp.com>
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
Link: https://lore.kernel.org/r/20240725193355.1436005-2-Frank.Li@nxp.com
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
---
drivers/clocksource/timer-imx-tpm.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index cd23caf..92c025b 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -91,6 +91,14 @@ static int tpm_set_next_event(unsigned long delta,
now = tpm_read_counter();
/*
+ * Need to wait CNT increase at least 1 cycle to make sure
+ * the C0V has been updated into HW.
+ */
+ if ((next & 0xffffffff) != readl(timer_base + TPM_C0V))
+ while (now == tpm_read_counter())
+ ;
+
+ /*
* NOTE: We observed in a very small probability, the bus fabric
* contention between GPU and A7 may results a few cycles delay
* of writing CNT registers which may cause the min_delta event got
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 5b8843fcd49827813da80c0f590a17ae4ce93c5d
Gitweb: https://git.kernel.org/tip/5b8843fcd49827813da80c0f590a17ae4ce93c5d
Author: Jacky Bai <ping.bai(a)nxp.com>
AuthorDate: Thu, 25 Jul 2024 15:33:54 -04:00
Committer: Daniel Lezcano <daniel.lezcano(a)linaro.org>
CommitterDate: Mon, 02 Sep 2024 10:04:15 +02:00
clocksource/drivers/imx-tpm: Fix return -ETIME when delta exceeds INT_MAX
In tpm_set_next_event(delta), return -ETIME by wrong cast to int when delta
is larger than INT_MAX.
For example:
tpm_set_next_event(delta = 0xffff_fffe)
{
...
next = tpm_read_counter(); // assume next is 0x10
next += delta; // next will 0xffff_fffe + 0x10 = 0x1_0000_000e
now = tpm_read_counter(); // now is 0x10
...
return (int)(next - now) <= 0 ? -ETIME : 0;
^^^^^^^^^^
0x1_0000_000e - 0x10 = 0xffff_fffe, which is -2 when
cast to int. So return -ETIME.
}
To fix this, introduce a 'prev' variable and check if 'now - prev' is
larger than delta.
Cc: stable(a)vger.kernel.org
Fixes: 059ab7b82eec ("clocksource/drivers/imx-tpm: Add imx tpm timer support")
Signed-off-by: Jacky Bai <ping.bai(a)nxp.com>
Reviewed-by: Peng Fan <peng.fan(a)nxp.com>
Reviewed-by: Ye Li <ye.li(a)nxp.com>
Reviewed-by: Jason Liu <jason.hui.liu(a)nxp.com>
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
Link: https://lore.kernel.org/r/20240725193355.1436005-1-Frank.Li@nxp.com
Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org>
---
drivers/clocksource/timer-imx-tpm.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index bd64a8a..cd23caf 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -83,10 +83,10 @@ static u64 notrace tpm_read_sched_clock(void)
static int tpm_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- unsigned long next, now;
+ unsigned long next, prev, now;
- next = tpm_read_counter();
- next += delta;
+ prev = tpm_read_counter();
+ next = prev + delta;
writel(next, timer_base + TPM_C0V);
now = tpm_read_counter();
@@ -96,7 +96,7 @@ static int tpm_set_next_event(unsigned long delta,
* of writing CNT registers which may cause the min_delta event got
* missed, so we need add a ETIME check here in case it happened.
*/
- return (int)(next - now) <= 0 ? -ETIME : 0;
+ return (now - prev) >= delta ? -ETIME : 0;
}
static int tpm_set_state_oneshot(struct clock_event_device *evt)
We have met a deadlock issue on our device which use 5.15.y when resuming.
After applying this patch which is picked from mainline, issue solved.
Backport to 6.1.y also.
Rafael J. Wysocki (1):
PM: sleep: Restore asynchronous device resume optimization
drivers/base/power/main.c | 117 +++++++++++++++++++++-----------------
include/linux/pm.h | 1 +
2 files changed, 65 insertions(+), 53 deletions(-)
--
2.18.0
On X1E80100, GPIO interrupts for wakeup-capable pins have been broken since
the introduction of the pinctrl driver. This prevents keyboard and touchpad
from working on most of the X1E laptops. So far we have worked around this
by manually building a kernel with the "wakeup-parent" removed from the
pinctrl node in the device tree, but we cannot expect all users to do that.
Implement a similar workaround in the driver by clearing the wakeirq_map
for X1E80100. This avoids using the PDC wakeup parent for all GPIOs
and handles the interrupts directly in the pinctrl driver instead.
The PDC driver needs additional changes to support X1E80100 properly.
Adding a workaround separately first allows to land the necessary PDC
changes through the normal release cycle, while still solving the more
critical problem with keyboard and touchpad on the current stable kernel
versions. Bypassing the PDC is enough for now, because we have not yet
enabled the deep idle states where using the PDC becomes necessary.
Cc: stable(a)vger.kernel.org
Fixes: 05e4941d97ef ("pinctrl: qcom: Add X1E80100 pinctrl driver")
Signed-off-by: Stephan Gerhold <stephan.gerhold(a)linaro.org>
---
Commenting out .wakeirq_map as well would give an unused declaration
warning for x1e80100_pdc_map. The map itself is correct, so I just "clear"
it by setting .nwakeirq_map to 0 for now. It's just temporary - this patch
will be reverted after we add X1E80100 support to the PDC driver.
---
drivers/pinctrl/qcom/pinctrl-x1e80100.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/pinctrl/qcom/pinctrl-x1e80100.c b/drivers/pinctrl/qcom/pinctrl-x1e80100.c
index 65ed933f05ce..abfcdd3da9e8 100644
--- a/drivers/pinctrl/qcom/pinctrl-x1e80100.c
+++ b/drivers/pinctrl/qcom/pinctrl-x1e80100.c
@@ -1839,7 +1839,9 @@ static const struct msm_pinctrl_soc_data x1e80100_pinctrl = {
.ngroups = ARRAY_SIZE(x1e80100_groups),
.ngpios = 239,
.wakeirq_map = x1e80100_pdc_map,
- .nwakeirq_map = ARRAY_SIZE(x1e80100_pdc_map),
+ /* TODO: Enabling PDC currently breaks GPIO interrupts */
+ .nwakeirq_map = 0,
+ /* .nwakeirq_map = ARRAY_SIZE(x1e80100_pdc_map), */
.egpio_func = 9,
};
---
base-commit: 128f71fe014fc91efa1407ce549f94a9a9f1072c
change-id: 20240830-x1e80100-bypass-pdc-39a8c1ae594f
Best regards,
--
Stephan Gerhold <stephan.gerhold(a)linaro.org>
From: Long Li <longli(a)microsoft.com>
MANA hardware uses 4k page size. When calculating the page table index,
it should use the hardware page size, not the system page size.
Cc: stable(a)vger.kernel.org
Fixes: 0266a177631d ("RDMA/mana_ib: Add a driver for Microsoft Azure Network Adapter")
Signed-off-by: Long Li <longli(a)microsoft.com>
---
change log
v2: replaced net with rdma-next tag in patch title and simplified the title
drivers/infiniband/hw/mana/main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index d13abc954d2a..f68f54aea820 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -383,7 +383,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem
create_req->length = umem->length;
create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz);
- create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
+ create_req->gdma_page_type = order_base_2(page_sz) - MANA_PAGE_SHIFT;
create_req->page_count = num_pages_total;
ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
--
2.17.1
In parport_attach, the return value of ida_alloc is unchecked, witch leads
to the use of an invalid index value.
To address this issue, index should be checked. When the index value is
abnormal, the device should be freed.
Found by code review, compile tested only.
Cc: stable(a)vger.kernel.org
Fixes: fb56d97df70e ("pps: client: use new parport device model")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v3:
- modified Fixes tag as suggestions.
Changes in v2:
- removed error output as suggestions.
---
drivers/pps/clients/pps_parport.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/pps/clients/pps_parport.c b/drivers/pps/clients/pps_parport.c
index 63d03a0df5cc..abaffb4e1c1c 100644
--- a/drivers/pps/clients/pps_parport.c
+++ b/drivers/pps/clients/pps_parport.c
@@ -149,6 +149,9 @@ static void parport_attach(struct parport *port)
}
index = ida_alloc(&pps_client_index, GFP_KERNEL);
+ if (index < 0)
+ goto err_free_device;
+
memset(&pps_client_cb, 0, sizeof(pps_client_cb));
pps_client_cb.private = device;
pps_client_cb.irq_func = parport_irq;
@@ -159,7 +162,7 @@ static void parport_attach(struct parport *port)
index);
if (!device->pardev) {
pr_err("couldn't register with %s\n", port->name);
- goto err_free;
+ goto err_free_ida;
}
if (parport_claim_or_block(device->pardev) < 0) {
@@ -187,8 +190,9 @@ static void parport_attach(struct parport *port)
parport_release(device->pardev);
err_unregister_dev:
parport_unregister_device(device->pardev);
-err_free:
+err_free_ida:
ida_free(&pps_client_index, index);
+err_free_device:
kfree(device);
}
--
2.25.1
Dave forgot to mark the original patch for stable, so after consulting with Dave, here it comes
@Greg: you might want to add the patch to all versions that received 14dd46cf31f4 ("xfs: split xfs_inobt_init_cursor")
(which I think is v6.9 and v6.10)
/Anders
-------- Forwarded Message --------
Subject: [PATCH 3/9] xfs: xfs_finobt_count_blocks() walks the wrong btree
Date: Thu, 22 Aug 2024 16:59:33 -0700
From: Darrick J. Wong <djwong(a)kernel.org>
To: djwong(a)kernel.org
CC: Anders Blomdell <anders.blomdell(a)gmail.com>, Dave Chinner <dchinner(a)redhat.com>, Christoph Hellwig <hch(a)lst.de>, hch(a)lst.de, linux-xfs(a)vger.kernel.org
From: Dave Chinner <dchinner(a)redhat.com>
As a result of the factoring in commit 14dd46cf31f4 ("xfs: split
xfs_inobt_init_cursor"), mount started taking a long time on a
user's filesystem. For Anders, this made mount times regress from
under a second to over 15 minutes for a filesystem with only 30
million inodes in it.
Anders bisected it down to the above commit, but even then the bug
was not obvious. In this commit, over 20 calls to
xfs_inobt_init_cursor() were modified, and some we modified to call
a new function named xfs_finobt_init_cursor().
If that takes you a moment to reread those function names to see
what the rename was, then you have realised why this bug wasn't
spotted during review. And it wasn't spotted on inspection even
after the bisect pointed at this commit - a single missing "f" isn't
the easiest thing for a human eye to notice....
The result is that xfs_finobt_count_blocks() now incorrectly calls
xfs_inobt_init_cursor() so it is now walking the inobt instead of
the finobt. Hence when there are lots of allocated inodes in a
filesystem, mount takes a -long- time run because it now walks a
massive allocated inode btrees instead of the small, nearly empty
free inode btrees. It also means all the finobt space reservations
are wrong, so mount could potentially given ENOSPC on kernel
upgrade.
In hindsight, commit 14dd46cf31f4 should have been two commits - the
first to convert the finobt callers to the new API, the second to
modify the xfs_inobt_init_cursor() API for the inobt callers. That
would have made the bug very obvious during review.
Fixes: 14dd46cf31f4 ("xfs: split xfs_inobt_init_cursor")
Reported-by: Anders Blomdell <anders.blomdell(a)gmail.com>
Signed-off-by: Dave Chinner <dchinner(a)redhat.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Darrick J. Wong <djwong(a)kernel.org>
---
fs/xfs/libxfs/xfs_ialloc_btree.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 496e2f72a85b9..797d5b5f7b725 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -749,7 +749,7 @@ xfs_finobt_count_blocks(
if (error)
return error;
- cur = xfs_inobt_init_cursor(pag, tp, agbp);
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
error = xfs_btree_count_blocks(cur, tree_blocks);
xfs_btree_del_cursor(cur, error);
xfs_trans_brelse(tp, agbp);
The quilt patch titled
Subject: scripts/gdb: fix lx-mounts command error
has been removed from the -mm tree. Its filename was
scripts-gdb-fix-lx-mounts-command-error.patch
This patch was dropped because it was merged into the mm-nonmm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Subject: scripts/gdb: fix lx-mounts command error
Date: Tue, 23 Jul 2024 14:48:59 +0800
(gdb) lx-mounts
mount super_block devname pathname fstype options
Python Exception <class 'gdb.error'>: There is no member named list.
Error occurred in Python: There is no member named list.
We encounter the above issue after commit 2eea9ce4310d ("mounts: keep
list of mounts in an rbtree"). The commit move a mount from list into
rbtree.
So we can instead use rbtree to iterate all mounts information.
Link: https://lkml.kernel.org/r/20240723064902.124154-4-kuan-ying.lee@canonical.c…
Fixes: 2eea9ce4310d ("mounts: keep list of mounts in an rbtree")
Signed-off-by: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/gdb/linux/proc.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/scripts/gdb/linux/proc.py~scripts-gdb-fix-lx-mounts-command-error
+++ a/scripts/gdb/linux/proc.py
@@ -18,6 +18,7 @@ from linux import utils
from linux import tasks
from linux import lists
from linux import vfs
+from linux import rbtree
from struct import *
@@ -172,8 +173,7 @@ values of that process namespace"""
gdb.write("{:^18} {:^15} {:>9} {} {} options\n".format(
"mount", "super_block", "devname", "pathname", "fstype"))
- for mnt in lists.list_for_each_entry(namespace['list'],
- mount_ptr_type, "mnt_list"):
+ for mnt in rbtree.rb_inorder_for_each_entry(namespace['mounts'], mount_ptr_type, "mnt_node"):
devname = mnt['mnt_devname'].string()
devname = devname if devname else "none"
_
Patches currently in -mm which might be from kuan-ying.lee(a)canonical.com are
The quilt patch titled
Subject: scripts/gdb: add iteration function for rbtree
has been removed from the -mm tree. Its filename was
scripts-gdb-add-iteration-function-for-rbtree.patch
This patch was dropped because it was merged into the mm-nonmm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Subject: scripts/gdb: add iteration function for rbtree
Date: Tue, 23 Jul 2024 14:48:58 +0800
Add inorder iteration function for rbtree usage.
This is a preparation patch for the next patch to fix the gdb mounts
issue.
Link: https://lkml.kernel.org/r/20240723064902.124154-3-kuan-ying.lee@canonical.c…
Fixes: 2eea9ce4310d ("mounts: keep list of mounts in an rbtree")
Signed-off-by: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/gdb/linux/rbtree.py | 12 ++++++++++++
1 file changed, 12 insertions(+)
--- a/scripts/gdb/linux/rbtree.py~scripts-gdb-add-iteration-function-for-rbtree
+++ a/scripts/gdb/linux/rbtree.py
@@ -9,6 +9,18 @@ from linux import utils
rb_root_type = utils.CachedType("struct rb_root")
rb_node_type = utils.CachedType("struct rb_node")
+def rb_inorder_for_each(root):
+ def inorder(node):
+ if node:
+ yield from inorder(node['rb_left'])
+ yield node
+ yield from inorder(node['rb_right'])
+
+ yield from inorder(root['rb_node'])
+
+def rb_inorder_for_each_entry(root, gdbtype, member):
+ for node in rb_inorder_for_each(root):
+ yield utils.container_of(node, gdbtype, member)
def rb_first(root):
if root.type == rb_root_type.get_type():
_
Patches currently in -mm which might be from kuan-ying.lee(a)canonical.com are
The quilt patch titled
Subject: scripts/gdb: fix timerlist parsing issue
has been removed from the -mm tree. Its filename was
scripts-gdb-fix-timerlist-parsing-issue.patch
This patch was dropped because it was merged into the mm-nonmm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Subject: scripts/gdb: fix timerlist parsing issue
Date: Tue, 23 Jul 2024 14:48:57 +0800
Patch series "Fix some GDB command error and add some GDB commands", v3.
Fix some GDB command errors and add some useful GDB commands.
This patch (of 5):
Commit 7988e5ae2be7 ("tick: Split nohz and highres features from
nohz_mode") and commit 7988e5ae2be7 ("tick: Split nohz and highres
features from nohz_mode") move 'tick_stopped' and 'nohz_mode' to flags
field which will break the gdb lx-mounts command:
(gdb) lx-timerlist
Python Exception <class 'gdb.error'>: There is no member named nohz_mode.
Error occurred in Python: There is no member named nohz_mode.
(gdb) lx-timerlist
Python Exception <class 'gdb.error'>: There is no member named tick_stopped.
Error occurred in Python: There is no member named tick_stopped.
We move 'tick_stopped' and 'nohz_mode' to flags field instead.
Link: https://lkml.kernel.org/r/20240723064902.124154-1-kuan-ying.lee@canonical.c…
Link: https://lkml.kernel.org/r/20240723064902.124154-2-kuan-ying.lee@canonical.c…
Fixes: a478ffb2ae23 ("tick: Move individual bit features to debuggable mask accesses")
Fixes: 7988e5ae2be7 ("tick: Split nohz and highres features from nohz_mode")
Signed-off-by: Kuan-Ying Lee <kuan-ying.lee(a)canonical.com>
Cc: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/gdb/linux/timerlist.py | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
--- a/scripts/gdb/linux/timerlist.py~scripts-gdb-fix-timerlist-parsing-issue
+++ a/scripts/gdb/linux/timerlist.py
@@ -87,21 +87,22 @@ def print_cpu(hrtimer_bases, cpu, max_cl
text += "\n"
if constants.LX_CONFIG_TICK_ONESHOT:
- fmts = [(" .{} : {}", 'nohz_mode'),
- (" .{} : {} nsecs", 'last_tick'),
- (" .{} : {}", 'tick_stopped'),
- (" .{} : {}", 'idle_jiffies'),
- (" .{} : {}", 'idle_calls'),
- (" .{} : {}", 'idle_sleeps'),
- (" .{} : {} nsecs", 'idle_entrytime'),
- (" .{} : {} nsecs", 'idle_waketime'),
- (" .{} : {} nsecs", 'idle_exittime'),
- (" .{} : {} nsecs", 'idle_sleeptime'),
- (" .{}: {} nsecs", 'iowait_sleeptime'),
- (" .{} : {}", 'last_jiffies'),
- (" .{} : {}", 'next_timer'),
- (" .{} : {} nsecs", 'idle_expires')]
- text += "\n".join([s.format(f, ts[f]) for s, f in fmts])
+ TS_FLAG_STOPPED = 1 << 1
+ TS_FLAG_NOHZ = 1 << 4
+ text += f" .{'nohz':15s}: {int(bool(ts['flags'] & TS_FLAG_NOHZ))}\n"
+ text += f" .{'last_tick':15s}: {ts['last_tick']}\n"
+ text += f" .{'tick_stopped':15s}: {int(bool(ts['flags'] & TS_FLAG_STOPPED))}\n"
+ text += f" .{'idle_jiffies':15s}: {ts['idle_jiffies']}\n"
+ text += f" .{'idle_calls':15s}: {ts['idle_calls']}\n"
+ text += f" .{'idle_sleeps':15s}: {ts['idle_sleeps']}\n"
+ text += f" .{'idle_entrytime':15s}: {ts['idle_entrytime']} nsecs\n"
+ text += f" .{'idle_waketime':15s}: {ts['idle_waketime']} nsecs\n"
+ text += f" .{'idle_exittime':15s}: {ts['idle_exittime']} nsecs\n"
+ text += f" .{'idle_sleeptime':15s}: {ts['idle_sleeptime']} nsecs\n"
+ text += f" .{'iowait_sleeptime':15s}: {ts['iowait_sleeptime']} nsecs\n"
+ text += f" .{'last_jiffies':15s}: {ts['last_jiffies']}\n"
+ text += f" .{'next_timer':15s}: {ts['next_timer']}\n"
+ text += f" .{'idle_expires':15s}: {ts['idle_expires']} nsecs\n"
text += "\njiffies: {}\n".format(jiffies)
text += "\n"
_
Patches currently in -mm which might be from kuan-ying.lee(a)canonical.com are
The quilt patch titled
Subject: ocfs2: fix the la space leak when unmounting an ocfs2 volume
has been removed from the -mm tree. Its filename was
ocfs2-fix-the-la-space-leak-when-unmounting-an-ocfs2-volume.patch
This patch was dropped because it was merged into the mm-nonmm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Heming Zhao <heming.zhao(a)suse.com>
Subject: ocfs2: fix the la space leak when unmounting an ocfs2 volume
Date: Fri, 19 Jul 2024 19:43:10 +0800
This bug has existed since the initial OCFS2 code. The code logic in
ocfs2_sync_local_to_main() is wrong, as it ignores the last contiguous
free bits, which causes an OCFS2 volume to lose the last free clusters of
LA window on each umount command.
Link: https://lkml.kernel.org/r/20240719114310.14245-1-heming.zhao@suse.com
Signed-off-by: Heming Zhao <heming.zhao(a)suse.com>
Reviewed-by: Su Yue <glass.su(a)suse.com>
Reviewed-by: Joseph Qi <joseph.qi(a)linux.alibaba.com>
Cc: Mark Fasheh <mark(a)fasheh.com>
Cc: Joel Becker <jlbec(a)evilplan.org>
Cc: Junxiao Bi <junxiao.bi(a)oracle.com>
Cc: Changwei Ge <gechangwei(a)live.cn>
Cc: Gang He <ghe(a)suse.com>
Cc: Jun Piao <piaojun(a)huawei.com>
Cc: Heming Zhao <heming.zhao(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/ocfs2/localalloc.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
--- a/fs/ocfs2/localalloc.c~ocfs2-fix-the-la-space-leak-when-unmounting-an-ocfs2-volume
+++ a/fs/ocfs2/localalloc.c
@@ -1002,6 +1002,25 @@ static int ocfs2_sync_local_to_main(stru
start = bit_off + 1;
}
+ /* clear the contiguous bits until the end boundary */
+ if (count) {
+ blkno = la_start_blk +
+ ocfs2_clusters_to_blocks(osb->sb,
+ start - count);
+
+ trace_ocfs2_sync_local_to_main_free(
+ count, start - count,
+ (unsigned long long)la_start_blk,
+ (unsigned long long)blkno);
+
+ status = ocfs2_release_clusters(handle,
+ main_bm_inode,
+ main_bm_bh, blkno,
+ count);
+ if (status < 0)
+ mlog_errno(status);
+ }
+
bail:
if (status)
mlog_errno(status);
_
Patches currently in -mm which might be from heming.zhao(a)suse.com are
The quilt patch titled
Subject: mm/hugetlb_vmemmap: batch HVO work when demoting
has been removed from the -mm tree. Its filename was
mm-hugetlb_vmemmap-batch-hvo-work-when-demoting.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Yu Zhao <yuzhao(a)google.com>
Subject: mm/hugetlb_vmemmap: batch HVO work when demoting
Date: Mon, 12 Aug 2024 16:48:23 -0600
Batch the HVO work, including de-HVO of the source and HVO of the
destination hugeTLB folios, to speed up demotion.
After commit bd225530a4c7 ("mm/hugetlb_vmemmap: fix race with speculative
PFN walkers"), each request of HVO or de-HVO, batched or not, invokes
synchronize_rcu() once. For example, when not batched, demoting one 1GB
hugeTLB folio to 512 2MB hugeTLB folios invokes synchronize_rcu() 513
times (1 de-HVO plus 512 HVO requests), whereas when batched, only twice
(1 de-HVO plus 1 HVO request). And the performance difference between the
two cases is significant, e.g.,
echo 2048kB >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote_size
time echo 100 >/sys/kernel/mm/hugepages/hugepages-1048576kB/demote
Before this patch:
real 8m58.158s
user 0m0.009s
sys 0m5.900s
After this patch:
real 0m0.900s
user 0m0.000s
sys 0m0.851s
Note that this patch changes the behavior of the `demote` interface when
de-HVO fails. Before, the interface aborts immediately upon failure; now,
it tries to finish an entire batch, meaning it can make extra progress if
the rest of the batch contains folios that do not need to de-HVO.
Link: https://lkml.kernel.org/r/20240812224823.3914837-1-yuzhao@google.com
Fixes: bd225530a4c7 ("mm/hugetlb_vmemmap: fix race with speculative PFN walkers")
Signed-off-by: Yu Zhao <yuzhao(a)google.com>
Reviewed-by: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 156 ++++++++++++++++++++++++++++---------------------
1 file changed, 92 insertions(+), 64 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlb_vmemmap-batch-hvo-work-when-demoting
+++ a/mm/hugetlb.c
@@ -3921,101 +3921,125 @@ out:
return 0;
}
-static int demote_free_hugetlb_folio(struct hstate *h, struct folio *folio)
+static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
+ struct list_head *src_list)
{
- int i, nid = folio_nid(folio);
- struct hstate *target_hstate;
- struct page *subpage;
- struct folio *inner_folio;
- int rc = 0;
+ long rc;
+ struct folio *folio, *next;
+ LIST_HEAD(dst_list);
+ LIST_HEAD(ret_list);
- target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
-
- remove_hugetlb_folio(h, folio, false);
- spin_unlock_irq(&hugetlb_lock);
-
- /*
- * If vmemmap already existed for folio, the remove routine above would
- * have cleared the hugetlb folio flag. Hence the folio is technically
- * no longer a hugetlb folio. hugetlb_vmemmap_restore_folio can only be
- * passed hugetlb folios and will BUG otherwise.
- */
- if (folio_test_hugetlb(folio)) {
- rc = hugetlb_vmemmap_restore_folio(h, folio);
- if (rc) {
- /* Allocation of vmemmmap failed, we can not demote folio */
- spin_lock_irq(&hugetlb_lock);
- add_hugetlb_folio(h, folio, false);
- return rc;
- }
- }
-
- /*
- * Use destroy_compound_hugetlb_folio_for_demote for all huge page
- * sizes as it will not ref count folios.
- */
- destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h));
+ rc = hugetlb_vmemmap_restore_folios(src, src_list, &ret_list);
+ list_splice_init(&ret_list, src_list);
/*
* Taking target hstate mutex synchronizes with set_max_huge_pages.
* Without the mutex, pages added to target hstate could be marked
* as surplus.
*
- * Note that we already hold h->resize_lock. To prevent deadlock,
+ * Note that we already hold src->resize_lock. To prevent deadlock,
* use the convention of always taking larger size hstate mutex first.
*/
- mutex_lock(&target_hstate->resize_lock);
- for (i = 0; i < pages_per_huge_page(h);
- i += pages_per_huge_page(target_hstate)) {
- subpage = folio_page(folio, i);
- inner_folio = page_folio(subpage);
- if (hstate_is_gigantic(target_hstate))
- prep_compound_gigantic_folio_for_demote(inner_folio,
- target_hstate->order);
- else
- prep_compound_page(subpage, target_hstate->order);
- folio_change_private(inner_folio, NULL);
- prep_new_hugetlb_folio(target_hstate, inner_folio, nid);
- free_huge_folio(inner_folio);
+ mutex_lock(&dst->resize_lock);
+
+ list_for_each_entry_safe(folio, next, src_list, lru) {
+ int i;
+
+ if (folio_test_hugetlb_vmemmap_optimized(folio))
+ continue;
+
+ list_del(&folio->lru);
+ /*
+ * Use destroy_compound_hugetlb_folio_for_demote for all huge page
+ * sizes as it will not ref count folios.
+ */
+ destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(src));
+
+ for (i = 0; i < pages_per_huge_page(src); i += pages_per_huge_page(dst)) {
+ struct page *page = folio_page(folio, i);
+
+ if (hstate_is_gigantic(dst))
+ prep_compound_gigantic_folio_for_demote(page_folio(page),
+ dst->order);
+ else
+ prep_compound_page(page, dst->order);
+ set_page_private(page, 0);
+
+ init_new_hugetlb_folio(dst, page_folio(page));
+ list_add(&page->lru, &dst_list);
+ }
}
- mutex_unlock(&target_hstate->resize_lock);
- spin_lock_irq(&hugetlb_lock);
+ prep_and_add_allocated_folios(dst, &dst_list);
- /*
- * Not absolutely necessary, but for consistency update max_huge_pages
- * based on pool changes for the demoted page.
- */
- h->max_huge_pages--;
- target_hstate->max_huge_pages +=
- pages_per_huge_page(h) / pages_per_huge_page(target_hstate);
+ mutex_unlock(&dst->resize_lock);
return rc;
}
-static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+static long demote_pool_huge_page(struct hstate *src, nodemask_t *nodes_allowed,
+ unsigned long nr_to_demote)
__must_hold(&hugetlb_lock)
{
int nr_nodes, node;
- struct folio *folio;
+ struct hstate *dst;
+ long rc = 0;
+ long nr_demoted = 0;
lockdep_assert_held(&hugetlb_lock);
/* We should never get here if no demote order */
- if (!h->demote_order) {
+ if (!src->demote_order) {
pr_warn("HugeTLB: NULL demote order passed to demote_pool_huge_page.\n");
return -EINVAL; /* internal error */
}
+ dst = size_to_hstate(PAGE_SIZE << src->demote_order);
- for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
- list_for_each_entry(folio, &h->hugepage_freelists[node], lru) {
+ for_each_node_mask_to_free(src, nr_nodes, node, nodes_allowed) {
+ LIST_HEAD(list);
+ struct folio *folio, *next;
+
+ list_for_each_entry_safe(folio, next, &src->hugepage_freelists[node], lru) {
if (folio_test_hwpoison(folio))
continue;
- return demote_free_hugetlb_folio(h, folio);
+
+ remove_hugetlb_folio(src, folio, false);
+ list_add(&folio->lru, &list);
+
+ if (++nr_demoted == nr_to_demote)
+ break;
+ }
+
+ spin_unlock_irq(&hugetlb_lock);
+
+ rc = demote_free_hugetlb_folios(src, dst, &list);
+
+ spin_lock_irq(&hugetlb_lock);
+
+ list_for_each_entry_safe(folio, next, &list, lru) {
+ list_del(&folio->lru);
+ add_hugetlb_folio(src, folio, false);
+
+ nr_demoted--;
}
+
+ if (rc < 0 || nr_demoted == nr_to_demote)
+ break;
}
/*
+ * Not absolutely necessary, but for consistency update max_huge_pages
+ * based on pool changes for the demoted page.
+ */
+ src->max_huge_pages -= nr_demoted;
+ dst->max_huge_pages += nr_demoted << (huge_page_order(src) - huge_page_order(dst));
+
+ if (rc < 0)
+ return rc;
+
+ if (nr_demoted)
+ return nr_demoted;
+ /*
* Only way to get here is if all pages on free lists are poisoned.
* Return -EBUSY so that caller will not retry.
*/
@@ -4249,6 +4273,8 @@ static ssize_t demote_store(struct kobje
spin_lock_irq(&hugetlb_lock);
while (nr_demote) {
+ long rc;
+
/*
* Check for available pages to demote each time thorough the
* loop as demote_pool_huge_page will drop hugetlb_lock.
@@ -4261,11 +4287,13 @@ static ssize_t demote_store(struct kobje
if (!nr_available)
break;
- err = demote_pool_huge_page(h, n_mask);
- if (err)
+ rc = demote_pool_huge_page(h, n_mask, nr_demote);
+ if (rc < 0) {
+ err = rc;
break;
+ }
- nr_demote--;
+ nr_demote -= rc;
}
spin_unlock_irq(&hugetlb_lock);
_
Patches currently in -mm which might be from yuzhao(a)google.com are
mm-contig_alloc-support-__gfp_comp.patch
mm-cma-add-cma_allocfree_folio.patch
mm-cma-add-cma_allocfree_folio-fix.patch
mm-hugetlb-use-__gfp_comp-for-gigantic-folios.patch
mm-free-zapped-tail-pages-when-splitting-isolated-thp.patch
mm-remap-unused-subpages-to-shared-zeropage-when-splitting-isolated-thp.patch
The quilt patch titled
Subject: mm: only enforce minimum stack gap size if it's sensible
has been removed from the -mm tree. Its filename was
mm-only-enforce-minimum-stack-gap-size-if-its-sensible.patch
This patch was dropped because it was merged into the mm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: David Gow <davidgow(a)google.com>
Subject: mm: only enforce minimum stack gap size if it's sensible
Date: Sat, 3 Aug 2024 15:46:41 +0800
The generic mmap_base code tries to leave a gap between the top of the
stack and the mmap base address, but enforces a minimum gap size (MIN_GAP)
of 128MB, which is too large on some setups. In particular, on arm tasks
without ADDR_LIMIT_32BIT, the STACK_TOP value is less than 128MB, so it's
impossible to fit such a gap in.
Only enforce this minimum if MIN_GAP < MAX_GAP, as we'd prefer to honour
MAX_GAP, which is defined proportionally, so scales better and always
leaves us with both _some_ stack space and some room for mmap.
This fixes the usercopy KUnit test suite on 32-bit arm, as it doesn't set
any personality flags so gets the default (in this case 26-bit) task size.
This test can be run with: ./tools/testing/kunit/kunit.py run --arch arm
usercopy --make_options LLVM=1
Link: https://lkml.kernel.org/r/20240803074642.1849623-2-davidgow@google.com
Fixes: dba79c3df4a2 ("arm: use generic mmap top-down layout and brk randomization")
Signed-off-by: David Gow <davidgow(a)google.com>
Reviewed-by: Kees Cook <kees(a)kernel.org>
Cc: Alexandre Ghiti <alex(a)ghiti.fr>
Cc: Linus Walleij <linus.walleij(a)linaro.org>
Cc: Luis Chamberlain <mcgrof(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Russell King <linux(a)armlinux.org.uk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/util.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/util.c~mm-only-enforce-minimum-stack-gap-size-if-its-sensible
+++ a/mm/util.c
@@ -463,7 +463,7 @@ static unsigned long mmap_base(unsigned
if (gap + pad > gap)
gap += pad;
- if (gap < MIN_GAP)
+ if (gap < MIN_GAP && MIN_GAP < MAX_GAP)
gap = MIN_GAP;
else if (gap > MAX_GAP)
gap = MAX_GAP;
_
Patches currently in -mm which might be from davidgow(a)google.com are
The quilt patch titled
Subject: alloc_tag: fix allocation tag reporting when CONFIG_MODULES=n
has been removed from the -mm tree. Its filename was
alloc_tag-fix-allocation-tag-reporting-when-config_modules=n.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Suren Baghdasaryan <surenb(a)google.com>
Subject: alloc_tag: fix allocation tag reporting when CONFIG_MODULES=n
Date: Wed, 28 Aug 2024 16:15:36 -0700
codetag_module_init() is used to initialize sections containing allocation
tags. This function is used to initialize module sections as well as core
kernel sections, in which case the module parameter is set to NULL. This
function has to be called even when CONFIG_MODULES=n to initialize core
kernel allocation tag sections. When CONFIG_MODULES=n, this function is a
NOP, which is wrong. This leads to /proc/allocinfo reported as empty.
Fix this by making it independent of CONFIG_MODULES.
Link: https://lkml.kernel.org/r/20240828231536.1770519-1-surenb@google.com
Fixes: 916cc5167cc6 ("lib: code tagging framework")
Signed-off-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: Pasha Tatashin <pasha.tatashin(a)soleen.com>
Cc: Sourav Panda <souravpanda(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org> [6.10+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/codetag.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
--- a/lib/codetag.c~alloc_tag-fix-allocation-tag-reporting-when-config_modules=n
+++ a/lib/codetag.c
@@ -125,7 +125,6 @@ static inline size_t range_size(const st
cttype->desc.tag_size;
}
-#ifdef CONFIG_MODULES
static void *get_symbol(struct module *mod, const char *prefix, const char *name)
{
DECLARE_SEQ_BUF(sb, KSYM_NAME_LEN);
@@ -155,6 +154,15 @@ static struct codetag_range get_section_
};
}
+static const char *get_mod_name(__maybe_unused struct module *mod)
+{
+#ifdef CONFIG_MODULES
+ if (mod)
+ return mod->name;
+#endif
+ return "(built-in)";
+}
+
static int codetag_module_init(struct codetag_type *cttype, struct module *mod)
{
struct codetag_range range;
@@ -164,8 +172,7 @@ static int codetag_module_init(struct co
range = get_section_range(mod, cttype->desc.section);
if (!range.start || !range.stop) {
pr_warn("Failed to load code tags of type %s from the module %s\n",
- cttype->desc.section,
- mod ? mod->name : "(built-in)");
+ cttype->desc.section, get_mod_name(mod));
return -EINVAL;
}
@@ -199,6 +206,7 @@ static int codetag_module_init(struct co
return 0;
}
+#ifdef CONFIG_MODULES
void codetag_load_module(struct module *mod)
{
struct codetag_type *cttype;
@@ -248,9 +256,6 @@ bool codetag_unload_module(struct module
return unload_ok;
}
-
-#else /* CONFIG_MODULES */
-static int codetag_module_init(struct codetag_type *cttype, struct module *mod) { return 0; }
#endif /* CONFIG_MODULES */
struct codetag_type *
_
Patches currently in -mm which might be from surenb(a)google.com are
The quilt patch titled
Subject: mm: vmalloc: optimize vmap_lazy_nr arithmetic when purging each vmap_area
has been removed from the -mm tree. Its filename was
mm-vmalloc-optimize-vmap_lazy_nr-arithmetic-when-purging-each-vmap_area.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Adrian Huang <ahuang12(a)lenovo.com>
Subject: mm: vmalloc: optimize vmap_lazy_nr arithmetic when purging each vmap_area
Date: Thu, 29 Aug 2024 21:06:33 +0800
When running the vmalloc stress on a 448-core system, observe the average
latency of purge_vmap_node() is about 2 seconds by using the eBPF/bcc
'funclatency.py' tool [1].
# /your-git-repo/bcc/tools/funclatency.py -u purge_vmap_node & pid1=$! && sleep 8 && modprobe test_vmalloc nr_threads=$(nproc) run_test_mask=0x7; kill -SIGINT $pid1
usecs : count distribution
0 -> 1 : 0 | |
2 -> 3 : 29 | |
4 -> 7 : 19 | |
8 -> 15 : 56 | |
16 -> 31 : 483 |**** |
32 -> 63 : 1548 |************ |
64 -> 127 : 2634 |********************* |
128 -> 255 : 2535 |********************* |
256 -> 511 : 1776 |************** |
512 -> 1023 : 1015 |******** |
1024 -> 2047 : 573 |**** |
2048 -> 4095 : 488 |**** |
4096 -> 8191 : 1091 |********* |
8192 -> 16383 : 3078 |************************* |
16384 -> 32767 : 4821 |****************************************|
32768 -> 65535 : 3318 |*************************** |
65536 -> 131071 : 1718 |************** |
131072 -> 262143 : 2220 |****************** |
262144 -> 524287 : 1147 |********* |
524288 -> 1048575 : 1179 |********* |
1048576 -> 2097151 : 822 |****** |
2097152 -> 4194303 : 906 |******* |
4194304 -> 8388607 : 2148 |***************** |
8388608 -> 16777215 : 4497 |************************************* |
16777216 -> 33554431 : 289 |** |
avg = 2041714 usecs, total: 78381401772 usecs, count: 38390
The worst case is over 16-33 seconds, so soft lockup is triggered [2].
[Root Cause]
1) Each purge_list has the long list. The following shows the number of
vmap_area is purged.
crash> p vmap_nodes
vmap_nodes = $27 = (struct vmap_node *) 0xff2de5a900100000
crash> vmap_node 0xff2de5a900100000 128 | grep nr_purged
nr_purged = 663070
...
nr_purged = 821670
nr_purged = 692214
nr_purged = 726808
...
2) atomic_long_sub() employs the 'lock' prefix to ensure the atomic
operation when purging each vmap_area. However, the iteration is over
600000 vmap_area (See 'nr_purged' above).
Here is objdump output:
$ objdump -D vmlinux
ffffffff813e8c80 <purge_vmap_node>:
...
ffffffff813e8d70: f0 48 29 2d 68 0c bb lock sub %rbp,0x2bb0c68(%rip)
...
Quote from "Instruction tables" pdf file [3]:
Instructions with a LOCK prefix have a long latency that depends on
cache organization and possibly RAM speed. If there are multiple
processors or cores or direct memory access (DMA) devices, then all
locked instructions will lock a cache line for exclusive access,
which may involve RAM access. A LOCK prefix typically costs more
than a hundred clock cycles, even on single-processor systems.
That's why the latency of purge_vmap_node() dramatically increases
on a many-core system: One core is busy on purging each vmap_area of
the *long* purge_list and executing atomic_long_sub() for each
vmap_area, while other cores free vmalloc allocations and execute
atomic_long_add_return() in free_vmap_area_noflush().
[Solution]
Employ a local variable to record the total purged pages, and execute
atomic_long_sub() after the traversal of the purge_list is done. The
experiment result shows the latency improvement is 99%.
[Experiment Result]
1) System Configuration: Three servers (with HT-enabled) are tested.
* 72-core server: 3rd Gen Intel Xeon Scalable Processor*1
* 192-core server: 5th Gen Intel Xeon Scalable Processor*2
* 448-core server: AMD Zen 4 Processor*2
2) Kernel Config
* CONFIG_KASAN is disabled
3) The data in column "w/o patch" and "w/ patch"
* Unit: micro seconds (us)
* Each data is the average of 3-time measurements
System w/o patch (us) w/ patch (us) Improvement (%)
--------------- -------------- ------------- -------------
72-core server 2194 14 99.36%
192-core server 143799 1139 99.21%
448-core server 1992122 6883 99.65%
[1] https://github.com/iovisor/bcc/blob/master/tools/funclatency.py
[2] https://gist.github.com/AdrianHuang/37c15f67b45407b83c2d32f918656c12
[3] https://www.agner.org/optimize/instruction_tables.pdf
Link: https://lkml.kernel.org/r/20240829130633.2184-1-ahuang12@lenovo.com
Signed-off-by: Adrian Huang <ahuang12(a)lenovo.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki(a)gmail.com>
Cc: Christoph Hellwig <hch(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmalloc.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/mm/vmalloc.c~mm-vmalloc-optimize-vmap_lazy_nr-arithmetic-when-purging-each-vmap_area
+++ a/mm/vmalloc.c
@@ -2191,6 +2191,7 @@ static void purge_vmap_node(struct work_
{
struct vmap_node *vn = container_of(work,
struct vmap_node, purge_work);
+ unsigned long nr_purged_pages = 0;
struct vmap_area *va, *n_va;
LIST_HEAD(local_list);
@@ -2208,7 +2209,7 @@ static void purge_vmap_node(struct work_
kasan_release_vmalloc(orig_start, orig_end,
va->va_start, va->va_end);
- atomic_long_sub(nr, &vmap_lazy_nr);
+ nr_purged_pages += nr;
vn->nr_purged++;
if (is_vn_id_valid(vn_id) && !vn->skip_populate)
@@ -2219,6 +2220,8 @@ static void purge_vmap_node(struct work_
list_add(&va->list, &local_list);
}
+ atomic_long_sub(nr_purged_pages, &vmap_lazy_nr);
+
reclaim_list_global(&local_list);
}
_
Patches currently in -mm which might be from ahuang12(a)lenovo.com are
mm-vmalloc-combine-all-tlb-flush-operations-of-kasan-shadow-virtual-address-into-one-operation.patch
The quilt patch titled
Subject: Revert "mm: skip CMA pages when they are not available"
has been removed from the -mm tree. Its filename was
revert-mm-skip-cma-pages-when-they-are-not-available.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Usama Arif <usamaarif642(a)gmail.com>
Subject: Revert "mm: skip CMA pages when they are not available"
Date: Wed, 21 Aug 2024 20:26:07 +0100
This reverts commit 5da226dbfce3 ("mm: skip CMA pages when they are not
available") and b7108d66318a ("Multi-gen LRU: skip CMA pages when they are
not eligible").
lruvec->lru_lock is highly contended and is held when calling
isolate_lru_folios. If the lru has a large number of CMA folios
consecutively, while the allocation type requested is not MIGRATE_MOVABLE,
isolate_lru_folios can hold the lock for a very long time while it skips
those. For FIO workload, ~150million order=0 folios were skipped to
isolate a few ZONE_DMA folios [1]. This can cause lockups [1] and high
memory pressure for extended periods of time [2].
Remove skipping CMA for MGLRU as well, as it was introduced in sort_folio
for the same resaon as 5da226dbfce3a2f44978c2c7cf88166e69a6788b.
[1] https://lore.kernel.org/all/CAOUHufbkhMZYz20aM_3rHZ3OcK4m2puji2FGpUpn_-DevG…
[2] https://lore.kernel.org/all/ZrssOrcJIDy8hacI@gmail.com/
[usamaarif642(a)gmail.com: also revert b7108d66318a, per Johannes]
Link: https://lkml.kernel.org/r/9060a32d-b2d7-48c0-8626-1db535653c54@gmail.com
Link: https://lkml.kernel.org/r/357ac325-4c61-497a-92a3-bdbd230d5ec9@gmail.com
Link: https://lkml.kernel.org/r/9060a32d-b2d7-48c0-8626-1db535653c54@gmail.com
Fixes: 5da226dbfce3 ("mm: skip CMA pages when they are not available")
Signed-off-by: Usama Arif <usamaarif642(a)gmail.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Bharata B Rao <bharata(a)amd.com>
Cc: Breno Leitao <leitao(a)debian.org>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Yu Zhao <yuzhao(a)google.com>
Cc: Zhaoyang Huang <huangzhaoyang(a)gmail.com>
Cc: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/vmscan.c | 24 ++----------------------
1 file changed, 2 insertions(+), 22 deletions(-)
--- a/mm/vmscan.c~revert-mm-skip-cma-pages-when-they-are-not-available
+++ a/mm/vmscan.c
@@ -1604,25 +1604,6 @@ static __always_inline void update_lru_s
}
-#ifdef CONFIG_CMA
-/*
- * It is waste of effort to scan and reclaim CMA pages if it is not available
- * for current allocation context. Kswapd can not be enrolled as it can not
- * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL
- */
-static bool skip_cma(struct folio *folio, struct scan_control *sc)
-{
- return !current_is_kswapd() &&
- gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
- folio_migratetype(folio) == MIGRATE_CMA;
-}
-#else
-static bool skip_cma(struct folio *folio, struct scan_control *sc)
-{
- return false;
-}
-#endif
-
/*
* Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
*
@@ -1669,8 +1650,7 @@ static unsigned long isolate_lru_folios(
nr_pages = folio_nr_pages(folio);
total_scan += nr_pages;
- if (folio_zonenum(folio) > sc->reclaim_idx ||
- skip_cma(folio, sc)) {
+ if (folio_zonenum(folio) > sc->reclaim_idx) {
nr_skipped[folio_zonenum(folio)] += nr_pages;
move_to = &folios_skipped;
goto move;
@@ -4320,7 +4300,7 @@ static bool sort_folio(struct lruvec *lr
}
/* ineligible */
- if (zone > sc->reclaim_idx || skip_cma(folio, sc)) {
+ if (zone > sc->reclaim_idx) {
gen = folio_inc_gen(lruvec, folio, false);
list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
return true;
_
Patches currently in -mm which might be from usamaarif642(a)gmail.com are
mm-store-zero-pages-to-be-swapped-out-in-a-bitmap.patch
mm-remove-code-to-handle-same-filled-pages.patch
mm-introduce-a-pageflag-for-partially-mapped-folios.patch
mm-split-underused-thps.patch
mm-add-sysfs-entry-to-disable-splitting-underused-thps.patch
The quilt patch titled
Subject: maple_tree: remove rcu_read_lock() from mt_validate()
has been removed from the -mm tree. Its filename was
maple_tree-remove-rcu_read_lock-from-mt_validate.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Liam R. Howlett" <Liam.Howlett(a)Oracle.com>
Subject: maple_tree: remove rcu_read_lock() from mt_validate()
Date: Tue, 20 Aug 2024 13:54:17 -0400
The write lock should be held when validating the tree to avoid updates
racing with checks. Holding the rcu read lock during a large tree
validation may also cause a prolonged rcu read window and "rcu_preempt
detected stalls" warnings.
Link: https://lore.kernel.org/all/0000000000001d12d4062005aea1@google.com/
Link: https://lkml.kernel.org/r/20240820175417.2782532-1-Liam.Howlett@oracle.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
Reported-by: syzbot+036af2f0c7338a33b0cd(a)syzkaller.appspotmail.com
Cc: Hillf Danton <hdanton(a)sina.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: "Paul E. McKenney" <paulmck(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/maple_tree.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
--- a/lib/maple_tree.c~maple_tree-remove-rcu_read_lock-from-mt_validate
+++ a/lib/maple_tree.c
@@ -7566,14 +7566,14 @@ static void mt_validate_nulls(struct map
* 2. The gap is correctly set in the parents
*/
void mt_validate(struct maple_tree *mt)
+ __must_hold(mas->tree->ma_lock)
{
unsigned char end;
MA_STATE(mas, mt, 0, 0);
- rcu_read_lock();
mas_start(&mas);
if (!mas_is_active(&mas))
- goto done;
+ return;
while (!mte_is_leaf(mas.node))
mas_descend(&mas);
@@ -7594,9 +7594,6 @@ void mt_validate(struct maple_tree *mt)
mas_dfs_postorder(&mas, ULONG_MAX);
}
mt_validate_nulls(mt);
-done:
- rcu_read_unlock();
-
}
EXPORT_SYMBOL_GPL(mt_validate);
_
Patches currently in -mm which might be from Liam.Howlett(a)Oracle.com are
mm-vma-correctly-position-vma_iterator-in-__split_vma.patch
mm-vma-introduce-abort_munmap_vmas.patch
mm-vma-introduce-vmi_complete_munmap_vmas.patch
mm-vma-extract-the-gathering-of-vmas-from-do_vmi_align_munmap.patch
mm-vma-introduce-vma_munmap_struct-for-use-in-munmap-operations.patch
mm-vma-change-munmap-to-use-vma_munmap_struct-for-accounting-and-surrounding-vmas.patch
mm-vma-extract-validate_mm-from-vma_complete.patch
mm-vma-inline-munmap-operation-in-mmap_region.patch
mm-vma-expand-mmap_region-munmap-call.patch
mm-vma-support-vma-==-null-in-init_vma_munmap.patch
mm-mmap-reposition-vma-iterator-in-mmap_region.patch
mm-vma-track-start-and-end-for-munmap-in-vma_munmap_struct.patch
mm-clean-up-unmap_region-argument-list.patch
mm-mmap-avoid-zeroing-vma-tree-in-mmap_region.patch
mm-change-failure-of-map_fixed-to-restoring-the-gap-on-failure.patch
mm-mmap-use-phys_pfn-in-mmap_region.patch
mm-mmap-use-vms-accounted-pages-in-mmap_region.patch
ipc-shm-mm-drop-do_vma_munmap.patch
mm-move-may_expand_vm-check-in-mmap_region.patch
mm-vma-drop-incorrect-comment-from-vms_gather_munmap_vmas.patch
mm-vmah-optimise-vma_munmap_struct.patch
The quilt patch titled
Subject: kexec_file: fix elfcorehdr digest exclusion when CONFIG_CRASH_HOTPLUG=y
has been removed from the -mm tree. Its filename was
kexec_file-fix-elfcorehdr-digest-exclusion-when-config_crash_hotplug=y.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Petr Tesarik <ptesarik(a)suse.com>
Subject: kexec_file: fix elfcorehdr digest exclusion when CONFIG_CRASH_HOTPLUG=y
Date: Mon, 5 Aug 2024 17:07:50 +0200
Fix the condition to exclude the elfcorehdr segment from the SHA digest
calculation.
The j iterator is an index into the output sha_regions[] array, not into
the input image->segment[] array. Once it reaches
image->elfcorehdr_index, all subsequent segments are excluded. Besides,
if the purgatory segment precedes the elfcorehdr segment, the elfcorehdr
may be wrongly included in the calculation.
Link: https://lkml.kernel.org/r/20240805150750.170739-1-petr.tesarik@suse.com
Fixes: f7cc804a9fd4 ("kexec: exclude elfcorehdr from the segment digest")
Signed-off-by: Petr Tesarik <ptesarik(a)suse.com>
Acked-by: Baoquan He <bhe(a)redhat.com>
Cc: Eric Biederman <ebiederm(a)xmission.com>
Cc: Hari Bathini <hbathini(a)linux.ibm.com>
Cc: Sourabh Jain <sourabhjain(a)linux.ibm.com>
Cc: Eric DeVolder <eric_devolder(a)yahoo.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kexec_file.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/kernel/kexec_file.c~kexec_file-fix-elfcorehdr-digest-exclusion-when-config_crash_hotplug=y
+++ a/kernel/kexec_file.c
@@ -752,7 +752,7 @@ static int kexec_calculate_store_digests
#ifdef CONFIG_CRASH_HOTPLUG
/* Exclude elfcorehdr segment to allow future changes via hotplug */
- if (j == image->elfcorehdr_index)
+ if (i == image->elfcorehdr_index)
continue;
#endif
_
Patches currently in -mm which might be from ptesarik(a)suse.com are
The quilt patch titled
Subject: mm/slub: add check for s->flags in the alloc_tagging_slab_free_hook
has been removed from the -mm tree. Its filename was
mm-slub-add-check-for-s-flags-in-the-alloc_tagging_slab_free_hook.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Hao Ge <gehao(a)kylinos.cn>
Subject: mm/slub: add check for s->flags in the alloc_tagging_slab_free_hook
Date: Fri, 16 Aug 2024 09:33:36 +0800
When enable CONFIG_MEMCG & CONFIG_KFENCE & CONFIG_KMEMLEAK, the following
warning always occurs,This is because the following call stack occurred:
mem_pool_alloc
kmem_cache_alloc_noprof
slab_alloc_node
kfence_alloc
Once the kfence allocation is successful,slab->obj_exts will not be empty,
because it has already been assigned a value in kfence_init_pool.
Since in the prepare_slab_obj_exts_hook function,we perform a check for
s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE),the alloc_tag_add function
will not be called as a result.Therefore,ref->ct remains NULL.
However,when we call mem_pool_free,since obj_ext is not empty, it
eventually leads to the alloc_tag_sub scenario being invoked. This is
where the warning occurs.
So we should add corresponding checks in the alloc_tagging_slab_free_hook.
For __GFP_NO_OBJ_EXT case,I didn't see the specific case where it's using
kfence,so I won't add the corresponding check in
alloc_tagging_slab_free_hook for now.
[ 3.734349] ------------[ cut here ]------------
[ 3.734807] alloc_tag was not set
[ 3.735129] WARNING: CPU: 4 PID: 40 at ./include/linux/alloc_tag.h:130 kmem_cache_free+0x444/0x574
[ 3.735866] Modules linked in: autofs4
[ 3.736211] CPU: 4 UID: 0 PID: 40 Comm: ksoftirqd/4 Tainted: G W 6.11.0-rc3-dirty #1
[ 3.736969] Tainted: [W]=WARN
[ 3.737258] Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022
[ 3.737875] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 3.738501] pc : kmem_cache_free+0x444/0x574
[ 3.738951] lr : kmem_cache_free+0x444/0x574
[ 3.739361] sp : ffff80008357bb60
[ 3.739693] x29: ffff80008357bb70 x28: 0000000000000000 x27: 0000000000000000
[ 3.740338] x26: ffff80008207f000 x25: ffff000b2eb2fd60 x24: ffff0000c0005700
[ 3.740982] x23: ffff8000804229e4 x22: ffff800082080000 x21: ffff800081756000
[ 3.741630] x20: fffffd7ff8253360 x19: 00000000000000a8 x18: ffffffffffffffff
[ 3.742274] x17: ffff800ab327f000 x16: ffff800083398000 x15: ffff800081756df0
[ 3.742919] x14: 0000000000000000 x13: 205d344320202020 x12: 5b5d373038343337
[ 3.743560] x11: ffff80008357b650 x10: 000000000000005d x9 : 00000000ffffffd0
[ 3.744231] x8 : 7f7f7f7f7f7f7f7f x7 : ffff80008237bad0 x6 : c0000000ffff7fff
[ 3.744907] x5 : ffff80008237ba78 x4 : ffff8000820bbad0 x3 : 0000000000000001
[ 3.745580] x2 : 68d66547c09f7800 x1 : 68d66547c09f7800 x0 : 0000000000000000
[ 3.746255] Call trace:
[ 3.746530] kmem_cache_free+0x444/0x574
[ 3.746931] mem_pool_free+0x44/0xf4
[ 3.747306] free_object_rcu+0xc8/0xdc
[ 3.747693] rcu_do_batch+0x234/0x8a4
[ 3.748075] rcu_core+0x230/0x3e4
[ 3.748424] rcu_core_si+0x14/0x1c
[ 3.748780] handle_softirqs+0x134/0x378
[ 3.749189] run_ksoftirqd+0x70/0x9c
[ 3.749560] smpboot_thread_fn+0x148/0x22c
[ 3.749978] kthread+0x10c/0x118
[ 3.750323] ret_from_fork+0x10/0x20
[ 3.750696] ---[ end trace 0000000000000000 ]---
Link: https://lkml.kernel.org/r/20240816013336.17505-1-hao.ge@linux.dev
Fixes: 4b8736964640 ("mm/slab: add allocation accounting into slab allocation and free paths")
Signed-off-by: Hao Ge <gehao(a)kylinos.cn>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Hyeonggon Yoo <42.hyeyoo(a)gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: Kees Cook <kees(a)kernel.org>
Cc: Kent Overstreet <kent.overstreet(a)linux.dev>
Cc: Pekka Enberg <penberg(a)kernel.org>
Cc: Roman Gushchin <roman.gushchin(a)linux.dev>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slub.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/mm/slub.c~mm-slub-add-check-for-s-flags-in-the-alloc_tagging_slab_free_hook
+++ a/mm/slub.c
@@ -2116,6 +2116,10 @@ alloc_tagging_slab_free_hook(struct kmem
if (!mem_alloc_profiling_enabled())
return;
+ /* slab->obj_exts might not be NULL if it was created for MEMCG accounting. */
+ if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE))
+ return;
+
obj_exts = slab_obj_exts(slab);
if (!obj_exts)
return;
_
Patches currently in -mm which might be from gehao(a)kylinos.cn are
mm-cma-change-the-addition-of-totalcma_pages-in-the-cma_init_reserved_mem.patch
The quilt patch titled
Subject: nilfs2: fix state management in error path of log writing function
has been removed from the -mm tree. Its filename was
nilfs2-fix-state-management-in-error-path-of-log-writing-function.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix state management in error path of log writing function
Date: Wed, 14 Aug 2024 19:11:19 +0900
After commit a694291a6211 ("nilfs2: separate wait function from
nilfs_segctor_write") was applied, the log writing function
nilfs_segctor_do_construct() was able to issue I/O requests continuously
even if user data blocks were split into multiple logs across segments,
but two potential flaws were introduced in its error handling.
First, if nilfs_segctor_begin_construction() fails while creating the
second or subsequent logs, the log writing function returns without
calling nilfs_segctor_abort_construction(), so the writeback flag set on
pages/folios will remain uncleared. This causes page cache operations to
hang waiting for the writeback flag. For example,
truncate_inode_pages_final(), which is called via nilfs_evict_inode() when
an inode is evicted from memory, will hang.
Second, the NILFS_I_COLLECTED flag set on normal inodes remain uncleared.
As a result, if the next log write involves checkpoint creation, that's
fine, but if a partial log write is performed that does not, inodes with
NILFS_I_COLLECTED set are erroneously removed from the "sc_dirty_files"
list, and their data and b-tree blocks may not be written to the device,
corrupting the block mapping.
Fix these issues by uniformly calling nilfs_segctor_abort_construction()
on failure of each step in the loop in nilfs_segctor_do_construct(),
having it clean up logs and segment usages according to progress, and
correcting the conditions for calling nilfs_redirty_inodes() to ensure
that the NILFS_I_COLLECTED flag is cleared.
Link: https://lkml.kernel.org/r/20240814101119.4070-1-konishi.ryusuke@gmail.com
Fixes: a694291a6211 ("nilfs2: separate wait function from nilfs_segctor_write")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
--- a/fs/nilfs2/segment.c~nilfs2-fix-state-management-in-error-path-of-log-writing-function
+++ a/fs/nilfs2/segment.c
@@ -1812,6 +1812,9 @@ static void nilfs_segctor_abort_construc
nilfs_abort_logs(&logs, ret ? : err);
list_splice_tail_init(&sci->sc_segbufs, &logs);
+ if (list_empty(&logs))
+ return; /* if the first segment buffer preparation failed */
+
nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
nilfs_free_incomplete_logs(&logs, nilfs);
@@ -2056,7 +2059,7 @@ static int nilfs_segctor_do_construct(st
err = nilfs_segctor_begin_construction(sci, nilfs);
if (unlikely(err))
- goto out;
+ goto failed;
/* Update time stamp */
sci->sc_seg_ctime = ktime_get_real_seconds();
@@ -2120,10 +2123,9 @@ static int nilfs_segctor_do_construct(st
return err;
failed_to_write:
- if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
- nilfs_redirty_inodes(&sci->sc_dirty_files);
-
failed:
+ if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE)
+ nilfs_redirty_inodes(&sci->sc_dirty_files);
if (nilfs_doing_gc())
nilfs_redirty_inodes(&sci->sc_gc_inodes);
nilfs_segctor_abort_construction(sci, nilfs, err);
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-add-support-for-fs_ioc_getuuid.patch
nilfs2-add-support-for-fs_ioc_getfssysfspath.patch
nilfs2-add-support-for-fs_ioc_getfslabel.patch
nilfs2-add-support-for-fs_ioc_setfslabel.patch
nilfs2-do-not-output-warnings-when-clearing-dirty-buffers.patch
nilfs2-add-missing-argument-description-for-__nilfs_error.patch
nilfs2-add-missing-argument-descriptions-for-ioctl-related-helpers.patch
nilfs2-improve-kernel-doc-comments-for-b-tree-node-helpers.patch
nilfs2-fix-incorrect-kernel-doc-declaration-of-nilfs_palloc_req-structure.patch
nilfs2-add-missing-description-of-nilfs_btree_path-structure.patch
nilfs2-describe-the-members-of-nilfs_bmap_operations-structure.patch
nilfs2-fix-inconsistencies-in-kernel-doc-comments-in-segmenth.patch
nilfs2-fix-missing-initial-short-descriptions-of-kernel-doc-comments.patch
nilfs2-treat-missing-sufile-header-block-as-metadata-corruption.patch
nilfs2-treat-missing-cpfile-header-block-as-metadata-corruption.patch
nilfs2-do-not-propagate-enoent-error-from-sufile-during-recovery.patch
nilfs2-do-not-propagate-enoent-error-from-sufile-during-gc.patch
nilfs2-do-not-propagate-enoent-error-from-nilfs_sufile_mark_dirty.patch
nilfs2-use-the-bits_per_long-macro.patch
nilfs2-separate-inode-type-information-from-i_state-field.patch
nilfs2-eliminate-the-shared-counter-and-spinlock-for-i_generation.patch
nilfs2-do-not-repair-reserved-inode-bitmap-in-nilfs_new_inode.patch
nilfs2-remove-sc_timer_task.patch
nilfs2-use-kthread_create-and-kthread_stop-for-the-log-writer-thread.patch
nilfs2-refactor-nilfs_segctor_thread.patch
The quilt patch titled
Subject: nilfs2: fix missing cleanup on rollforward recovery error
has been removed from the -mm tree. Its filename was
nilfs2-fix-missing-cleanup-on-rollforward-recovery-error.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix missing cleanup on rollforward recovery error
Date: Sat, 10 Aug 2024 15:52:42 +0900
In an error injection test of a routine for mount-time recovery, KASAN
found a use-after-free bug.
It turned out that if data recovery was performed using partial logs
created by dsync writes, but an error occurred before starting the log
writer to create a recovered checkpoint, the inodes whose data had been
recovered were left in the ns_dirty_files list of the nilfs object and
were not freed.
Fix this issue by cleaning up inodes that have read the recovery data if
the recovery routine fails midway before the log writer starts.
Link: https://lkml.kernel.org/r/20240810065242.3701-1-konishi.ryusuke@gmail.com
Fixes: 0f3e1c7f23f8 ("nilfs2: recovery functions")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/recovery.c | 35 +++++++++++++++++++++++++++++++++--
1 file changed, 33 insertions(+), 2 deletions(-)
--- a/fs/nilfs2/recovery.c~nilfs2-fix-missing-cleanup-on-rollforward-recovery-error
+++ a/fs/nilfs2/recovery.c
@@ -716,6 +716,33 @@ static void nilfs_finish_roll_forward(st
}
/**
+ * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery
+ * @nilfs: nilfs object
+ */
+static void nilfs_abort_roll_forward(struct the_nilfs *nilfs)
+{
+ struct nilfs_inode_info *ii, *n;
+ LIST_HEAD(head);
+
+ /* Abandon inodes that have read recovery data */
+ spin_lock(&nilfs->ns_inode_lock);
+ list_splice_init(&nilfs->ns_dirty_files, &head);
+ spin_unlock(&nilfs->ns_inode_lock);
+ if (list_empty(&head))
+ return;
+
+ set_nilfs_purging(nilfs);
+ list_for_each_entry_safe(ii, n, &head, i_dirty) {
+ spin_lock(&nilfs->ns_inode_lock);
+ list_del_init(&ii->i_dirty);
+ spin_unlock(&nilfs->ns_inode_lock);
+
+ iput(&ii->vfs_inode);
+ }
+ clear_nilfs_purging(nilfs);
+}
+
+/**
* nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
* @nilfs: nilfs object
* @sb: super block instance
@@ -773,15 +800,19 @@ int nilfs_salvage_orphan_logs(struct the
if (unlikely(err)) {
nilfs_err(sb, "error %d writing segment for recovery",
err);
- goto failed;
+ goto put_root;
}
nilfs_finish_roll_forward(nilfs, ri);
}
- failed:
+put_root:
nilfs_put_root(root);
return err;
+
+failed:
+ nilfs_abort_roll_forward(nilfs);
+ goto put_root;
}
/**
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-add-support-for-fs_ioc_getuuid.patch
nilfs2-add-support-for-fs_ioc_getfssysfspath.patch
nilfs2-add-support-for-fs_ioc_getfslabel.patch
nilfs2-add-support-for-fs_ioc_setfslabel.patch
nilfs2-do-not-output-warnings-when-clearing-dirty-buffers.patch
nilfs2-add-missing-argument-description-for-__nilfs_error.patch
nilfs2-add-missing-argument-descriptions-for-ioctl-related-helpers.patch
nilfs2-improve-kernel-doc-comments-for-b-tree-node-helpers.patch
nilfs2-fix-incorrect-kernel-doc-declaration-of-nilfs_palloc_req-structure.patch
nilfs2-add-missing-description-of-nilfs_btree_path-structure.patch
nilfs2-describe-the-members-of-nilfs_bmap_operations-structure.patch
nilfs2-fix-inconsistencies-in-kernel-doc-comments-in-segmenth.patch
nilfs2-fix-missing-initial-short-descriptions-of-kernel-doc-comments.patch
nilfs2-treat-missing-sufile-header-block-as-metadata-corruption.patch
nilfs2-treat-missing-cpfile-header-block-as-metadata-corruption.patch
nilfs2-do-not-propagate-enoent-error-from-sufile-during-recovery.patch
nilfs2-do-not-propagate-enoent-error-from-sufile-during-gc.patch
nilfs2-do-not-propagate-enoent-error-from-nilfs_sufile_mark_dirty.patch
nilfs2-use-the-bits_per_long-macro.patch
nilfs2-separate-inode-type-information-from-i_state-field.patch
nilfs2-eliminate-the-shared-counter-and-spinlock-for-i_generation.patch
nilfs2-do-not-repair-reserved-inode-bitmap-in-nilfs_new_inode.patch
nilfs2-remove-sc_timer_task.patch
nilfs2-use-kthread_create-and-kthread_stop-for-the-log-writer-thread.patch
nilfs2-refactor-nilfs_segctor_thread.patch
The quilt patch titled
Subject: userfaultfd: don't BUG_ON() if khugepaged yanks our page table
has been removed from the -mm tree. Its filename was
userfaultfd-dont-bug_on-if-khugepaged-yanks-our-page-table.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Jann Horn <jannh(a)google.com>
Subject: userfaultfd: don't BUG_ON() if khugepaged yanks our page table
Date: Tue, 13 Aug 2024 22:25:22 +0200
Since khugepaged was changed to allow retracting page tables in file
mappings without holding the mmap lock, these BUG_ON()s are wrong - get
rid of them.
We could also remove the preceding "if (unlikely(...))" block, but then we
could reach pte_offset_map_lock() with transhuge pages not just for file
mappings but also for anonymous mappings - which would probably be fine
but I think is not necessarily expected.
Link: https://lkml.kernel.org/r/20240813-uffd-thp-flip-fix-v2-2-5efa61078a41@goog…
Fixes: 1d65b771bc08 ("mm/khugepaged: retract_page_tables() without mmap or vma lock")
Signed-off-by: Jann Horn <jannh(a)google.com>
Reviewed-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Pavel Emelyanov <xemul(a)virtuozzo.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/userfaultfd.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
--- a/mm/userfaultfd.c~userfaultfd-dont-bug_on-if-khugepaged-yanks-our-page-table
+++ a/mm/userfaultfd.c
@@ -807,9 +807,10 @@ retry:
err = -EFAULT;
break;
}
-
- BUG_ON(pmd_none(*dst_pmd));
- BUG_ON(pmd_trans_huge(*dst_pmd));
+ /*
+ * For shmem mappings, khugepaged is allowed to remove page
+ * tables under us; pte_offset_map_lock() will deal with that.
+ */
err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr,
src_addr, flags, &folio);
_
Patches currently in -mm which might be from jannh(a)google.com are
mm-fix-harmless-type-confusion-in-lock_vma_under_rcu.patch
The quilt patch titled
Subject: userfaultfd: fix checks for huge PMDs
has been removed from the -mm tree. Its filename was
userfaultfd-fix-checks-for-huge-pmds.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Jann Horn <jannh(a)google.com>
Subject: userfaultfd: fix checks for huge PMDs
Date: Tue, 13 Aug 2024 22:25:21 +0200
Patch series "userfaultfd: fix races around pmd_trans_huge() check", v2.
The pmd_trans_huge() code in mfill_atomic() is wrong in three different
ways depending on kernel version:
1. The pmd_trans_huge() check is racy and can lead to a BUG_ON() (if you hit
the right two race windows) - I've tested this in a kernel build with
some extra mdelay() calls. See the commit message for a description
of the race scenario.
On older kernels (before 6.5), I think the same bug can even
theoretically lead to accessing transhuge page contents as a page table
if you hit the right 5 narrow race windows (I haven't tested this case).
2. As pointed out by Qi Zheng, pmd_trans_huge() is not sufficient for
detecting PMDs that don't point to page tables.
On older kernels (before 6.5), you'd just have to win a single fairly
wide race to hit this.
I've tested this on 6.1 stable by racing migration (with a mdelay()
patched into try_to_migrate()) against UFFDIO_ZEROPAGE - on my x86
VM, that causes a kernel oops in ptlock_ptr().
3. On newer kernels (>=6.5), for shmem mappings, khugepaged is allowed
to yank page tables out from under us (though I haven't tested that),
so I think the BUG_ON() checks in mfill_atomic() are just wrong.
I decided to write two separate fixes for these (one fix for bugs 1+2, one
fix for bug 3), so that the first fix can be backported to kernels
affected by bugs 1+2.
This patch (of 2):
This fixes two issues.
I discovered that the following race can occur:
mfill_atomic other thread
============ ============
<zap PMD>
pmdp_get_lockless() [reads none pmd]
<bail if trans_huge>
<if none:>
<pagefault creates transhuge zeropage>
__pte_alloc [no-op]
<zap PMD>
<bail if pmd_trans_huge(*dst_pmd)>
BUG_ON(pmd_none(*dst_pmd))
I have experimentally verified this in a kernel with extra mdelay() calls;
the BUG_ON(pmd_none(*dst_pmd)) triggers.
On kernels newer than commit 0d940a9b270b ("mm/pgtable: allow
pte_offset_map[_lock]() to fail"), this can't lead to anything worse than
a BUG_ON(), since the page table access helpers are actually designed to
deal with page tables concurrently disappearing; but on older kernels
(<=6.4), I think we could probably theoretically race past the two
BUG_ON() checks and end up treating a hugepage as a page table.
The second issue is that, as Qi Zheng pointed out, there are other types
of huge PMDs that pmd_trans_huge() can't catch: devmap PMDs and swap PMDs
(in particular, migration PMDs).
On <=6.4, this is worse than the first issue: If mfill_atomic() runs on a
PMD that contains a migration entry (which just requires winning a single,
fairly wide race), it will pass the PMD to pte_offset_map_lock(), which
assumes that the PMD points to a page table.
Breakage follows: First, the kernel tries to take the PTE lock (which will
crash or maybe worse if there is no "struct page" for the address bits in
the migration entry PMD - I think at least on X86 there usually is no
corresponding "struct page" thanks to the PTE inversion mitigation, amd64
looks different).
If that didn't crash, the kernel would next try to write a PTE into what
it wrongly thinks is a page table.
As part of fixing these issues, get rid of the check for pmd_trans_huge()
before __pte_alloc() - that's redundant, we're going to have to check for
that after the __pte_alloc() anyway.
Backport note: pmdp_get_lockless() is pmd_read_atomic() in older kernels.
Link: https://lkml.kernel.org/r/20240813-uffd-thp-flip-fix-v2-0-5efa61078a41@goog…
Link: https://lkml.kernel.org/r/20240813-uffd-thp-flip-fix-v2-1-5efa61078a41@goog…
Fixes: c1a4de99fada ("userfaultfd: mcopy_atomic|mfill_zeropage: UFFDIO_COPY|UFFDIO_ZEROPAGE preparation")
Signed-off-by: Jann Horn <jannh(a)google.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Pavel Emelyanov <xemul(a)virtuozzo.com>
Cc: Qi Zheng <zhengqi.arch(a)bytedance.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/userfaultfd.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
--- a/mm/userfaultfd.c~userfaultfd-fix-checks-for-huge-pmds
+++ a/mm/userfaultfd.c
@@ -787,21 +787,23 @@ retry:
}
dst_pmdval = pmdp_get_lockless(dst_pmd);
- /*
- * If the dst_pmd is mapped as THP don't
- * override it and just be strict.
- */
- if (unlikely(pmd_trans_huge(dst_pmdval))) {
- err = -EEXIST;
- break;
- }
if (unlikely(pmd_none(dst_pmdval)) &&
unlikely(__pte_alloc(dst_mm, dst_pmd))) {
err = -ENOMEM;
break;
}
- /* If an huge pmd materialized from under us fail */
- if (unlikely(pmd_trans_huge(*dst_pmd))) {
+ dst_pmdval = pmdp_get_lockless(dst_pmd);
+ /*
+ * If the dst_pmd is THP don't override it and just be strict.
+ * (This includes the case where the PMD used to be THP and
+ * changed back to none after __pte_alloc().)
+ */
+ if (unlikely(!pmd_present(dst_pmdval) || pmd_trans_huge(dst_pmdval) ||
+ pmd_devmap(dst_pmdval))) {
+ err = -EEXIST;
+ break;
+ }
+ if (unlikely(pmd_bad(dst_pmdval))) {
err = -EFAULT;
break;
}
_
Patches currently in -mm which might be from jannh(a)google.com are
mm-fix-harmless-type-confusion-in-lock_vma_under_rcu.patch