The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x f31fe8165d365379d858c53bef43254c7d6d1cfd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051208-hardly-dole-1422@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f31fe8165d365379d858c53bef43254c7d6d1cfd Mon Sep 17 00:00:00 2001
From: Naman Jain <namjain(a)linux.microsoft.com>
Date: Fri, 2 May 2025 13:18:10 +0530
Subject: [PATCH] uio_hv_generic: Fix sysfs creation path for ring buffer
On regular bootup, devices get registered to VMBus first, so when
uio_hv_generic driver for a particular device type is probed,
the device is already initialized and added, so sysfs creation in
hv_uio_probe() works fine. However, when the device is removed
and brought back, the channel gets rescinded and the device again gets
registered to VMBus. However this time, the uio_hv_generic driver is
already registered to probe for that device and in this case sysfs
creation is tried before the device's kobject gets initialized
completely.
Fix this by moving the core logic of sysfs creation of ring buffer,
from uio_hv_generic to HyperV's VMBus driver, where the rest of the
sysfs attributes for the channels are defined. While doing that, make
use of attribute groups and macros, instead of creating sysfs
directly, to ensure better error handling and code flow.
Problematic path:
vmbus_process_offer (A new offer comes for the VMBus device)
vmbus_add_channel_work
vmbus_device_register
|-> device_register
| |...
| |-> hv_uio_probe
| |...
| |-> sysfs_create_bin_file (leads to a warning as
| the primary channel's kobject, which is used to
| create the sysfs file, is not yet initialized)
|-> kset_create_and_add
|-> vmbus_add_channel_kobj (initialization of the primary
channel's kobject happens later)
Above code flow is sequential and the warning is always reproducible in
this path.
Fixes: 9ab877a6ccf8 ("uio_hv_generic: make ring buffer attribute for primary channel")
Cc: stable(a)kernel.org
Suggested-by: Saurabh Sengar <ssengar(a)linux.microsoft.com>
Suggested-by: Michael Kelley <mhklinux(a)outlook.com>
Reviewed-by: Michael Kelley <mhklinux(a)outlook.com>
Tested-by: Michael Kelley <mhklinux(a)outlook.com>
Reviewed-by: Dexuan Cui <decui(a)microsoft.com>
Signed-off-by: Naman Jain <namjain(a)linux.microsoft.com>
Link: https://lore.kernel.org/r/20250502074811.2022-2-namjain@linux.microsoft.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 29780f3a7478..0b450e53161e 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -477,4 +477,10 @@ static inline int hv_debug_add_dev_dir(struct hv_device *dev)
#endif /* CONFIG_HYPERV_TESTING */
+/* Create and remove sysfs entry for memory mapped ring buffers for a channel */
+int hv_create_ring_sysfs(struct vmbus_channel *channel,
+ int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
+ struct vm_area_struct *vma));
+int hv_remove_ring_sysfs(struct vmbus_channel *channel);
+
#endif /* _HYPERV_VMBUS_H */
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 8d3cff42bdbb..0f16a83cc2d6 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1802,6 +1802,27 @@ static ssize_t subchannel_id_show(struct vmbus_channel *channel,
}
static VMBUS_CHAN_ATTR_RO(subchannel_id);
+static int hv_mmap_ring_buffer_wrapper(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ struct vmbus_channel *channel = container_of(kobj, struct vmbus_channel, kobj);
+
+ /*
+ * hv_(create|remove)_ring_sysfs implementation ensures that mmap_ring_buffer
+ * is not NULL.
+ */
+ return channel->mmap_ring_buffer(channel, vma);
+}
+
+static struct bin_attribute chan_attr_ring_buffer = {
+ .attr = {
+ .name = "ring",
+ .mode = 0600,
+ },
+ .size = 2 * SZ_2M,
+ .mmap = hv_mmap_ring_buffer_wrapper,
+};
static struct attribute *vmbus_chan_attrs[] = {
&chan_attr_out_mask.attr,
&chan_attr_in_mask.attr,
@@ -1821,6 +1842,11 @@ static struct attribute *vmbus_chan_attrs[] = {
NULL
};
+static struct bin_attribute *vmbus_chan_bin_attrs[] = {
+ &chan_attr_ring_buffer,
+ NULL
+};
+
/*
* Channel-level attribute_group callback function. Returns the permission for
* each attribute, and returns 0 if an attribute is not visible.
@@ -1841,9 +1867,24 @@ static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj,
return attr->mode;
}
+static umode_t vmbus_chan_bin_attr_is_visible(struct kobject *kobj,
+ const struct bin_attribute *attr, int idx)
+{
+ const struct vmbus_channel *channel =
+ container_of(kobj, struct vmbus_channel, kobj);
+
+ /* Hide ring attribute if channel's ring_sysfs_visible is set to false */
+ if (attr == &chan_attr_ring_buffer && !channel->ring_sysfs_visible)
+ return 0;
+
+ return attr->attr.mode;
+}
+
static const struct attribute_group vmbus_chan_group = {
.attrs = vmbus_chan_attrs,
- .is_visible = vmbus_chan_attr_is_visible
+ .bin_attrs = vmbus_chan_bin_attrs,
+ .is_visible = vmbus_chan_attr_is_visible,
+ .is_bin_visible = vmbus_chan_bin_attr_is_visible,
};
static const struct kobj_type vmbus_chan_ktype = {
@@ -1851,6 +1892,63 @@ static const struct kobj_type vmbus_chan_ktype = {
.release = vmbus_chan_release,
};
+/**
+ * hv_create_ring_sysfs() - create "ring" sysfs entry corresponding to ring buffers for a channel.
+ * @channel: Pointer to vmbus_channel structure
+ * @hv_mmap_ring_buffer: function pointer for initializing the function to be called on mmap of
+ * channel's "ring" sysfs node, which is for the ring buffer of that channel.
+ * Function pointer is of below type:
+ * int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
+ * struct vm_area_struct *vma))
+ * This has a pointer to the channel and a pointer to vm_area_struct,
+ * used for mmap, as arguments.
+ *
+ * Sysfs node for ring buffer of a channel is created along with other fields, however its
+ * visibility is disabled by default. Sysfs creation needs to be controlled when the use-case
+ * is running.
+ * For example, HV_NIC device is used either by uio_hv_generic or hv_netvsc at any given point of
+ * time, and "ring" sysfs is needed only when uio_hv_generic is bound to that device. To avoid
+ * exposing the ring buffer by default, this function is reponsible to enable visibility of
+ * ring for userspace to use.
+ * Note: Race conditions can happen with userspace and it is not encouraged to create new
+ * use-cases for this. This was added to maintain backward compatibility, while solving
+ * one of the race conditions in uio_hv_generic while creating sysfs.
+ *
+ * Returns 0 on success or error code on failure.
+ */
+int hv_create_ring_sysfs(struct vmbus_channel *channel,
+ int (*hv_mmap_ring_buffer)(struct vmbus_channel *channel,
+ struct vm_area_struct *vma))
+{
+ struct kobject *kobj = &channel->kobj;
+
+ channel->mmap_ring_buffer = hv_mmap_ring_buffer;
+ channel->ring_sysfs_visible = true;
+
+ return sysfs_update_group(kobj, &vmbus_chan_group);
+}
+EXPORT_SYMBOL_GPL(hv_create_ring_sysfs);
+
+/**
+ * hv_remove_ring_sysfs() - remove ring sysfs entry corresponding to ring buffers for a channel.
+ * @channel: Pointer to vmbus_channel structure
+ *
+ * Hide "ring" sysfs for a channel by changing its is_visible attribute and updating sysfs group.
+ *
+ * Returns 0 on success or error code on failure.
+ */
+int hv_remove_ring_sysfs(struct vmbus_channel *channel)
+{
+ struct kobject *kobj = &channel->kobj;
+ int ret;
+
+ channel->ring_sysfs_visible = false;
+ ret = sysfs_update_group(kobj, &vmbus_chan_group);
+ channel->mmap_ring_buffer = NULL;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hv_remove_ring_sysfs);
+
/*
* vmbus_add_channel_kobj - setup a sub-directory under device/channels
*/
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 1b19b5647495..69c1df0f4ca5 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -131,15 +131,12 @@ static void hv_uio_rescind(struct vmbus_channel *channel)
vmbus_device_unregister(channel->device_obj);
}
-/* Sysfs API to allow mmap of the ring buffers
+/* Function used for mmap of ring buffer sysfs interface.
* The ring buffer is allocated as contiguous memory by vmbus_open
*/
-static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj,
- const struct bin_attribute *attr,
- struct vm_area_struct *vma)
+static int
+hv_uio_ring_mmap(struct vmbus_channel *channel, struct vm_area_struct *vma)
{
- struct vmbus_channel *channel
- = container_of(kobj, struct vmbus_channel, kobj);
void *ring_buffer = page_address(channel->ringbuffer_page);
if (channel->state != CHANNEL_OPENED_STATE)
@@ -149,15 +146,6 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj,
channel->ringbuffer_pagecount << PAGE_SHIFT);
}
-static const struct bin_attribute ring_buffer_bin_attr = {
- .attr = {
- .name = "ring",
- .mode = 0600,
- },
- .size = 2 * SZ_2M,
- .mmap = hv_uio_ring_mmap,
-};
-
/* Callback from VMBUS subsystem when new channel created. */
static void
hv_uio_new_channel(struct vmbus_channel *new_sc)
@@ -178,8 +166,7 @@ hv_uio_new_channel(struct vmbus_channel *new_sc)
/* Disable interrupts on sub channel */
new_sc->inbound.ring_buffer->interrupt_mask = 1;
set_channel_read_mode(new_sc, HV_CALL_ISR);
-
- ret = sysfs_create_bin_file(&new_sc->kobj, &ring_buffer_bin_attr);
+ ret = hv_create_ring_sysfs(new_sc, hv_uio_ring_mmap);
if (ret) {
dev_err(device, "sysfs create ring bin file failed; %d\n", ret);
vmbus_close(new_sc);
@@ -350,10 +337,18 @@ hv_uio_probe(struct hv_device *dev,
goto fail_close;
}
- ret = sysfs_create_bin_file(&channel->kobj, &ring_buffer_bin_attr);
- if (ret)
- dev_notice(&dev->device,
- "sysfs create ring bin file failed; %d\n", ret);
+ /*
+ * This internally calls sysfs_update_group, which returns a non-zero value if it executes
+ * before sysfs_create_group. This is expected as the 'ring' will be created later in
+ * vmbus_device_register() -> vmbus_add_channel_kobj(). Thus, no need to check the return
+ * value and print warning.
+ *
+ * Creating/exposing sysfs in driver probe is not encouraged as it can lead to race
+ * conditions with userspace. For backward compatibility, "ring" sysfs could not be removed
+ * or decoupled from uio_hv_generic probe. Userspace programs can make use of inotify
+ * APIs to make sure that ring is created.
+ */
+ hv_create_ring_sysfs(channel, hv_uio_ring_mmap);
hv_set_drvdata(dev, pdata);
@@ -375,7 +370,7 @@ hv_uio_remove(struct hv_device *dev)
if (!pdata)
return;
- sysfs_remove_bin_file(&dev->channel->kobj, &ring_buffer_bin_attr);
+ hv_remove_ring_sysfs(dev->channel);
uio_unregister_device(&pdata->info);
hv_uio_cleanup(dev, pdata);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 675959fb97ba..d6ffe01962c2 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1002,6 +1002,12 @@ struct vmbus_channel {
/* The max size of a packet on this channel */
u32 max_pkt_size;
+
+ /* function to mmap ring buffer memory to the channel's sysfs ring attribute */
+ int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma);
+
+ /* boolean to control visibility of sysfs for ring buffer */
+ bool ring_sysfs_visible;
};
#define lock_requestor(channel, flags) \
From: Wayne Lin <wayne.lin(a)amd.com>
[ Upstream commit fcf6a49d79923a234844b8efe830a61f3f0584e4 ]
[Why]
When unplug one of monitors connected after mst hub, encounter null pointer dereference.
It's due to dc_sink get released immediately in early_unregister() or detect_ctx(). When
commit new state which directly referring to info stored in dc_sink will cause null pointer
dereference.
[how]
Remove redundant checking condition. Relevant condition should already be covered by checking
if dsc_aux is null or not. Also reset dsc_aux to NULL when the connector is disconnected.
Reviewed-by: Jerry Zuo <jerry.zuo(a)amd.com>
Acked-by: Zaeem Mohamed <zaeem.mohamed(a)amd.com>
Signed-off-by: Wayne Lin <wayne.lin(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
[The deleted codes in this fix is introduced by commit b9b5a82c5321
("drm/amd/display: Fix DSC-re-computing") after 6.11.]
Signed-off-by: Jianqi Ren <jianqi.ren.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Verified the build test
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 5eb994ed5471..6bb590bc7c19 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -183,6 +183,8 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
dc_sink_release(dc_sink);
aconnector->dc_sink = NULL;
aconnector->edid = NULL;
+ aconnector->dsc_aux = NULL;
+ port->passthrough_aux = NULL;
}
aconnector->mst_status = MST_STATUS_DEFAULT;
@@ -487,6 +489,8 @@ dm_dp_mst_detect(struct drm_connector *connector,
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
aconnector->edid = NULL;
+ aconnector->dsc_aux = NULL;
+ port->passthrough_aux = NULL;
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD,
--
2.34.1
From: Wei Fang <wei.fang(a)nxp.com>
[ Upstream commit c2e0c58b25a0a0c37ec643255558c5af4450c9f5 ]
There is a deadlock issue found in sungem driver, please refer to the
commit ac0a230f719b ("eth: sungem: remove .ndo_poll_controller to avoid
deadlocks"). The root cause of the issue is that netpoll is in atomic
context and disable_irq() is called by .ndo_poll_controller interface
of sungem driver, however, disable_irq() might sleep. After analyzing
the implementation of fec_poll_controller(), the fec driver should have
the same issue. Due to the fec driver uses NAPI for TX completions, the
.ndo_poll_controller is unnecessary to be implemented in the fec driver,
so fec_poll_controller() can be safely removed.
Fixes: 7f5c6addcdc0 ("net/fec: add poll controller function for fec nic")
Signed-off-by: Wei Fang <wei.fang(a)nxp.com>
Link: https://lore.kernel.org/r/20240511062009.652918-1-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
[Minor context change fixed]
Signed-off-by: Feng Liu <Feng.Liu3(a)windriver.com>
Signed-off-by: He Zhe <Zhe.He(a)windriver.com>
---
Verified the build test.
---
drivers/net/ethernet/freescale/fec_main.c | 26 -----------------------
1 file changed, 26 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 8e30e999456d..01579719f4ce 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3255,29 +3255,6 @@ fec_set_mac_address(struct net_device *ndev, void *p)
return 0;
}
-#ifdef CONFIG_NET_POLL_CONTROLLER
-/**
- * fec_poll_controller - FEC Poll controller function
- * @dev: The FEC network adapter
- *
- * Polled functionality used by netconsole and others in non interrupt mode
- *
- */
-static void fec_poll_controller(struct net_device *dev)
-{
- int i;
- struct fec_enet_private *fep = netdev_priv(dev);
-
- for (i = 0; i < FEC_IRQ_NUM; i++) {
- if (fep->irq[i] > 0) {
- disable_irq(fep->irq[i]);
- fec_enet_interrupt(fep->irq[i], dev);
- enable_irq(fep->irq[i]);
- }
- }
-}
-#endif
-
static inline void fec_enet_set_netdev_features(struct net_device *netdev,
netdev_features_t features)
{
@@ -3351,9 +3328,6 @@ static const struct net_device_ops fec_netdev_ops = {
.ndo_tx_timeout = fec_timeout,
.ndo_set_mac_address = fec_set_mac_address,
.ndo_do_ioctl = fec_enet_ioctl,
-#ifdef CONFIG_NET_POLL_CONTROLLER
- .ndo_poll_controller = fec_poll_controller,
-#endif
.ndo_set_features = fec_set_features,
};
--
2.34.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 94cff94634e506a4a44684bee1875d2dbf782722
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051256-safeguard-purchase-6c4e@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 94cff94634e506a4a44684bee1875d2dbf782722 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Date: Fri, 4 Apr 2025 15:31:16 +0200
Subject: [PATCH] clocksource/i8253: Use raw_spinlock_irqsave() in
clockevent_i8253_disable()
On x86 during boot, clockevent_i8253_disable() can be invoked via
x86_late_time_init -> hpet_time_init() -> pit_timer_init() which happens
with enabled interrupts.
If some of the old i8253 hardware is actually used then lockdep will notice
that i8253_lock is used in hard interrupt context. This causes lockdep to
complain because it observed the lock being acquired with interrupts
enabled and in hard interrupt context.
Make clockevent_i8253_disable() acquire the lock with
raw_spinlock_irqsave() to cure this.
[ tglx: Massage change log and use guard() ]
Fixes: c8c4076723dac ("x86/timer: Skip PIT initialization on modern chipsets")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20250404133116.p-XRWJXf@linutronix.de
diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index 39f7c2d736d1..b603c25f3dfa 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -103,7 +103,7 @@ int __init clocksource_i8253_init(void)
#ifdef CONFIG_CLKEVT_I8253
void clockevent_i8253_disable(void)
{
- raw_spin_lock(&i8253_lock);
+ guard(raw_spinlock_irqsave)(&i8253_lock);
/*
* Writing the MODE register should stop the counter, according to
@@ -132,8 +132,6 @@ void clockevent_i8253_disable(void)
outb_p(0, PIT_CH0);
outb_p(0x30, PIT_MODE);
-
- raw_spin_unlock(&i8253_lock);
}
static int pit_shutdown(struct clock_event_device *evt)
LoongArch's toolchain may change the default code model from normal to
medium. This is unnecessary for kernel, and generates some relocations
which cannot be handled by the module loader. So explicitly specify the
code model to normal in Makefile (for Rust 'normal' is 'small').
Cc: stable(a)vger.kernel.org
Tested-by: Haiyong Sun <sunhaiyong(a)loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
We may use new toolchain to build 6.1/6.6 LTS, backport it to avoid
problems.
arch/loongarch/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index a74bbcb05ee1..f2966745b058 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -55,7 +55,7 @@ endif
ifdef CONFIG_64BIT
ld-emul = $(64bit-emul)
-cflags-y += -mabi=lp64s
+cflags-y += -mabi=lp64s -mcmodel=normal
endif
cflags-y += -pipe -msoft-float
--
2.47.1
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x ab00ddd802f80e31fc9639c652d736fe3913feae
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025051243-poster-expiring-81bc@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ab00ddd802f80e31fc9639c652d736fe3913feae Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang(a)linux.alibaba.com>
Date: Wed, 23 Apr 2025 18:36:45 +0800
Subject: [PATCH] selftests/mm: compaction_test: support platform with huge
mount of memory
When running mm selftest to verify mm patches, 'compaction_test' case
failed on an x86 server with 1TB memory. And the root cause is that it
has too much free memory than what the test supports.
The test case tries to allocate 100000 huge pages, which is about 200 GB
for that x86 server, and when it succeeds, it expects it's large than 1/3
of 80% of the free memory in system. This logic only works for platform
with 750 GB ( 200 / (1/3) / 80% ) or less free memory, and may raise false
alarm for others.
Fix it by changing the fixed page number to self-adjustable number
according to the real number of free memory.
Link: https://lkml.kernel.org/r/20250423103645.2758-1-feng.tang@linux.alibaba.com
Fixes: bd67d5c15cc1 ("Test compaction of mlocked memory")
Signed-off-by: Feng Tang <feng.tang(a)linux.alibaba.com>
Acked-by: Dev Jain <dev.jain(a)arm.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang(a)inux.alibaba.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Sri Jayaramappa <sjayaram(a)akamai.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index 2c3a0eb6b22d..9bc4591c7b16 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
int compaction_index = 0;
char nr_hugepages[20] = {0};
char init_nr_hugepages[24] = {0};
+ char target_nr_hugepages[24] = {0};
+ int slen;
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
"%lu", initial_nr_hugepages);
@@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
goto out;
}
- /* Request a large number of huge pages. The Kernel will allocate
- as much as it can */
- if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
- ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
+ /*
+ * Request huge pages for about half of the free memory. The Kernel
+ * will allocate as much as it can, and we expect it will get at least 1/3
+ */
+ nr_hugepages_ul = mem_free / hugepage_size / 2;
+ snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
+ "%lu", nr_hugepages_ul);
+
+ slen = strlen(target_nr_hugepages);
+ if (write(fd, target_nr_hugepages, slen) != slen) {
+ ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
+ nr_hugepages_ul, strerror(errno));
goto close_fd;
}
Hi Greg
Two patches for synaptics driver seems to not have been backported to 5.4
but have been added to 5.10 and later versions. These patches are:
1. https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git/tre…
2. https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git/tre…
I'm sending them after rebasing them to 5.4.
Thanks
RESEND: since I forgot to Cc stable
Aditya Garg (1):
Input: synaptics - enable InterTouch on TUXEDO InfinityBook Pro 14 v5
Dmitry Torokhov (1):
Input: synaptics - enable SMBus for HP Elitebook 850 G1
drivers/input/mouse/synaptics.c | 2 ++
1 file changed, 2 insertions(+)
--
2.49.0
From: Puranjay Mohan <puranjay(a)kernel.org>
[ Upstream commit 19d3c179a37730caf600a97fed3794feac2b197b ]
When BPF_TRAMP_F_CALL_ORIG is set, the trampoline calls
__bpf_tramp_enter() and __bpf_tramp_exit() functions, passing them
the struct bpf_tramp_image *im pointer as an argument in R0.
The trampoline generation code uses emit_addr_mov_i64() to emit
instructions for moving the bpf_tramp_image address into R0, but
emit_addr_mov_i64() assumes the address to be in the vmalloc() space
and uses only 48 bits. Because bpf_tramp_image is allocated using
kzalloc(), its address can use more than 48-bits, in this case the
trampoline will pass an invalid address to __bpf_tramp_enter/exit()
causing a kernel crash.
Fix this by using emit_a64_mov_i64() in place of emit_addr_mov_i64()
as it can work with addresses that are greater than 48-bits.
Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64")
Signed-off-by: Puranjay Mohan <puranjay(a)kernel.org>
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Closes: https://lore.kernel.org/all/SJ0PR15MB461564D3F7E7A763498CA6A8CBDB2@SJ0PR15M…
Link: https://lore.kernel.org/bpf/20240711151838.43469-1-puranjay@kernel.org
[Minor context change fixed.]
Signed-off-by: Bin Lan <bin.lan.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Build test passed.
---
arch/arm64/net/bpf_jit_comp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c04ace8f4843..3168343815b3 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1893,7 +1893,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
+ emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_enter, ctx);
}
@@ -1937,7 +1937,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = ctx->image + ctx->idx;
- emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
+ emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_exit, ctx);
}
--
2.34.1
From: Puranjay Mohan <puranjay(a)kernel.org>
[ Upstream commit 19d3c179a37730caf600a97fed3794feac2b197b ]
When BPF_TRAMP_F_CALL_ORIG is set, the trampoline calls
__bpf_tramp_enter() and __bpf_tramp_exit() functions, passing them
the struct bpf_tramp_image *im pointer as an argument in R0.
The trampoline generation code uses emit_addr_mov_i64() to emit
instructions for moving the bpf_tramp_image address into R0, but
emit_addr_mov_i64() assumes the address to be in the vmalloc() space
and uses only 48 bits. Because bpf_tramp_image is allocated using
kzalloc(), its address can use more than 48-bits, in this case the
trampoline will pass an invalid address to __bpf_tramp_enter/exit()
causing a kernel crash.
Fix this by using emit_a64_mov_i64() in place of emit_addr_mov_i64()
as it can work with addresses that are greater than 48-bits.
Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64")
Signed-off-by: Puranjay Mohan <puranjay(a)kernel.org>
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Closes: https://lore.kernel.org/all/SJ0PR15MB461564D3F7E7A763498CA6A8CBDB2@SJ0PR15M…
Link: https://lore.kernel.org/bpf/20240711151838.43469-1-puranjay@kernel.org
[Minor context change fixed.]
Signed-off-by: Bin Lan <bin.lan.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Build test passed.
---
arch/arm64/net/bpf_jit_comp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 5074bd1d37b5..7c5156e7d31e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1952,7 +1952,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
+ emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_enter, ctx);
}
@@ -1996,7 +1996,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = ctx->image + ctx->idx;
- emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
+ emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_exit, ctx);
}
--
2.34.1
From: Zi Yan <ziy(a)nvidia.com>
nr_failed was missing the large folio splits from migrate_pages_batch()
and can cause a mismatch between migrate_pages() return value and the
number of not migrated pages, i.e., when the return value of
migrate_pages() is 0, there are still pages left in the from page list.
It will happen when a non-PMD THP large folio fails to migrate due to
-ENOMEM and is split successfully but not all the split pages are not
migrated, migrate_pages_batch() would return non-zero, but
astats.nr_thp_split = 0. nr_failed would be 0 and returned to the caller
of migrate_pages(), but the not migrated pages are left in the from page
list without being added back to LRU lists.
Fix it by adding a new nr_split counter for large folio splits and adding
it to nr_failed in migrate_page_sync() after migrate_pages_batch() is
done.
Link: https://lkml.kernel.org/r/20231017163129.2025214-1-zi.yan@sent.com
Fixes: 2ef7dbb26990 ("migrate_pages: try migrate in batch asynchronously firstly")
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
Acked-by: Huang Ying <ying.huang(a)intel.com>
Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
This patch has a Fixes tag and should be backported to 6.6, I don't know
why hasn't bakported.
mm/migrate.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 1004b1def1c2..4ed470885217 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1504,6 +1504,7 @@ struct migrate_pages_stats {
int nr_thp_succeeded; /* THP migrated successfully */
int nr_thp_failed; /* THP failed to be migrated */
int nr_thp_split; /* THP split before migrating */
+ int nr_split; /* Large folio (include THP) split before migrating */
};
/*
@@ -1623,6 +1624,7 @@ static int migrate_pages_batch(struct list_head *from,
int nr_retry_pages = 0;
int pass = 0;
bool is_thp = false;
+ bool is_large = false;
struct folio *folio, *folio2, *dst = NULL, *dst2;
int rc, rc_saved = 0, nr_pages;
LIST_HEAD(unmap_folios);
@@ -1638,7 +1640,8 @@ static int migrate_pages_batch(struct list_head *from,
nr_retry_pages = 0;
list_for_each_entry_safe(folio, folio2, from, lru) {
- is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio);
+ is_large = folio_test_large(folio);
+ is_thp = is_large && folio_test_pmd_mappable(folio);
nr_pages = folio_nr_pages(folio);
cond_resched();
@@ -1658,6 +1661,7 @@ static int migrate_pages_batch(struct list_head *from,
stats->nr_thp_failed++;
if (!try_split_folio(folio, split_folios)) {
stats->nr_thp_split++;
+ stats->nr_split++;
continue;
}
stats->nr_failed_pages += nr_pages;
@@ -1686,11 +1690,12 @@ static int migrate_pages_batch(struct list_head *from,
nr_failed++;
stats->nr_thp_failed += is_thp;
/* Large folio NUMA faulting doesn't split to retry. */
- if (folio_test_large(folio) && !nosplit) {
+ if (is_large && !nosplit) {
int ret = try_split_folio(folio, split_folios);
if (!ret) {
stats->nr_thp_split += is_thp;
+ stats->nr_split += is_large;
break;
} else if (reason == MR_LONGTERM_PIN &&
ret == -EAGAIN) {
@@ -1836,6 +1841,7 @@ static int migrate_pages_sync(struct list_head *from, new_folio_t get_new_folio,
stats->nr_succeeded += astats.nr_succeeded;
stats->nr_thp_succeeded += astats.nr_thp_succeeded;
stats->nr_thp_split += astats.nr_thp_split;
+ stats->nr_split += astats.nr_split;
if (rc < 0) {
stats->nr_failed_pages += astats.nr_failed_pages;
stats->nr_thp_failed += astats.nr_thp_failed;
@@ -1843,7 +1849,11 @@ static int migrate_pages_sync(struct list_head *from, new_folio_t get_new_folio,
return rc;
}
stats->nr_thp_failed += astats.nr_thp_split;
- nr_failed += astats.nr_thp_split;
+ /*
+ * Do not count rc, as pages will be retried below.
+ * Count nr_split only, since it includes nr_thp_split.
+ */
+ nr_failed += astats.nr_split;
/*
* Fall back to migrate all failed folios one by one synchronously. All
* failed folios except split THPs will be retried, so their failure
--
2.47.1