During suspend/resume process all connectors are explicitly disabled and
then reenabled. However resume fails because of the connector_status check:
[dpu error]connector not connected 3
[drm:drm_mode_config_helper_resume [drm_kms_helper]] *ERROR* Failed to resume (-22)
It doesn't make sense to check for the Writeback connected status (and
other drivers don't perform such check), so drop the check.
It wasn't a problem before the commit 71174f362d67 ("drm/msm/dpu: move
writeback's atomic_check to dpu_writeback.c"), since encoder's
atomic_check() is called under a different conditions that the
connector's atomic_check() (e.g. it is not called if there is no
connected CRTC or if the corresponding connector is not a part of the
new state).
Fixes: 71174f362d67 ("drm/msm/dpu: move writeback's atomic_check to dpu_writeback.c")
Cc: stable(a)vger.kernel.org
Reported-by: Leonard Lausen <leonard(a)lausen.nl>
Closes: https://gitlab.freedesktop.org/drm/msm/-/issues/57
Tested-by: Leonard Lausen <leonard(a)lausen.nl> # on sc7180 lazor
Reported-by: György Kurucz <me(a)kuruczgy.com>
Link: https://lore.kernel.org/all/b70a4d1d-f98f-4169-942c-cb9006a42b40@kuruczgy.c…
Reported-by: Johan Hovold <johan+linaro(a)kernel.org>
Link: https://lore.kernel.org/all/ZzyYI8KkWK36FfXf@hovoldconsulting.com/
Tested-by: György Kurucz <me(a)kuruczgy.com>
Reviewed-by: Johan Hovold <johan+linaro(a)kernel.org>
Tested-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
---
Leonard Lausen reported an issue with suspend/resume of the sc7180
devices. Fix the WB atomic check, which caused the issue.
---
Changes in v4:
- Epanded commit message (Johan)
- Link to v3: https://lore.kernel.org/r/20241208-dpu-fix-wb-v3-1-a1de69ce4a1b@linaro.org
Changes in v3:
- Rebased on top of msm-fixes
- Link to v2: https://lore.kernel.org/r/20240802-dpu-fix-wb-v2-0-7eac9eb8e895@linaro.org
Changes in v2:
- Reworked the writeback to just drop the connector->status check.
- Expanded commit message for the debugging patch.
- Link to v1: https://lore.kernel.org/r/20240709-dpu-fix-wb-v1-0-448348bfd4cb@linaro.org
---
drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c
index 16f144cbc0c986ee266412223d9e605b01f9fb8c..8ff496082902b1ee713e806140f39b4730ed256a 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c
@@ -42,9 +42,6 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector,
if (!conn_state || !conn_state->connector) {
DPU_ERROR("invalid connector state\n");
return -EINVAL;
- } else if (conn_state->connector->status != connector_status_connected) {
- DPU_ERROR("connector not connected %d\n", conn_state->connector->status);
- return -EINVAL;
}
crtc = conn_state->crtc;
---
base-commit: 86313a9cd152330c634b25d826a281c6a002eb77
change-id: 20240709-dpu-fix-wb-6cd57e3eb182
Best regards,
--
Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org>
From: Joshua Washington <joshwash(a)google.com>
This patch predicates the enabling and disabling of XSK pools on the
existence of queues. As it stands, if the interface is down, disabling
or enabling XSK pools would result in a crash, as the RX queue pointer
would be NULL. XSK pool registration will occur as part of the next
interface up.
Similarly, xsk_wakeup needs be guarded against queues disappearing
while the function is executing, so a check against the
GVE_PRIV_FLAGS_NAPI_ENABLED flag is added to synchronize with the
disabling of the bit and the synchronize_net() in gve_turndown.
Fixes: fd8e40321a12 ("gve: Add AF_XDP zero-copy support for GQI-QPL format")
Cc: stable(a)vger.kernel.org
Signed-off-by: Joshua Washington <joshwash(a)google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi(a)google.com>
Reviewed-by: Praveen Kaligineedi <pkaligineedi(a)google.com>
Reviewed-by: Shailend Chand <shailend(a)google.com>
Reviewed-by: Willem de Bruijn <willemb(a)google.com>
---
drivers/net/ethernet/google/gve/gve_main.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 5d7b0cc59959..e4e8ff4f9f80 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -1623,8 +1623,8 @@ static int gve_xsk_pool_enable(struct net_device *dev,
if (err)
return err;
- /* If XDP prog is not installed, return */
- if (!priv->xdp_prog)
+ /* If XDP prog is not installed or interface is down, return. */
+ if (!priv->xdp_prog || !netif_running(dev))
return 0;
rx = &priv->rx[qid];
@@ -1669,21 +1669,16 @@ static int gve_xsk_pool_disable(struct net_device *dev,
if (qid >= priv->rx_cfg.num_queues)
return -EINVAL;
- /* If XDP prog is not installed, unmap DMA and return */
- if (!priv->xdp_prog)
+ /* If XDP prog is not installed or interface is down, unmap DMA and
+ * return.
+ */
+ if (!priv->xdp_prog || !netif_running(dev))
goto done;
- tx_qid = gve_xdp_tx_queue_id(priv, qid);
- if (!netif_running(dev)) {
- priv->rx[qid].xsk_pool = NULL;
- xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
- priv->tx[tx_qid].xsk_pool = NULL;
- goto done;
- }
-
napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
napi_disable(napi_rx); /* make sure current rx poll is done */
+ tx_qid = gve_xdp_tx_queue_id(priv, qid);
napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
napi_disable(napi_tx); /* make sure current tx poll is done */
@@ -1711,6 +1706,9 @@ static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
struct gve_priv *priv = netdev_priv(dev);
int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
+ if (!gve_get_napi_enabled(priv))
+ return -ENETDOWN;
+
if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
return -EINVAL;
--
2.47.1.613.gc27f4b7a9f-goog
From: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
After commit
746ae46c1113 ("drm/sched: Mark scheduler work queues with WQ_MEM_RECLAIM")
amdgpu started seeing the following warning:
[ ] workqueue: WQ_MEM_RECLAIM sdma0:drm_sched_run_job_work [gpu_sched] is flushing !WQ_MEM_RECLAIM events:amdgpu_device_delay_enable_gfx_off [amdgpu]
...
[ ] Workqueue: sdma0 drm_sched_run_job_work [gpu_sched]
...
[ ] Call Trace:
[ ] <TASK>
...
[ ] ? check_flush_dependency+0xf5/0x110
...
[ ] cancel_delayed_work_sync+0x6e/0x80
[ ] amdgpu_gfx_off_ctrl+0xab/0x140 [amdgpu]
[ ] amdgpu_ring_alloc+0x40/0x50 [amdgpu]
[ ] amdgpu_ib_schedule+0xf4/0x810 [amdgpu]
[ ] ? drm_sched_run_job_work+0x22c/0x430 [gpu_sched]
[ ] amdgpu_job_run+0xaa/0x1f0 [amdgpu]
[ ] drm_sched_run_job_work+0x257/0x430 [gpu_sched]
[ ] process_one_work+0x217/0x720
...
[ ] </TASK>
The intent of the verifcation done in check_flush_depedency is to ensure
forward progress during memory reclaim, by flagging cases when either a
memory reclaim process, or a memory reclaim work item is flushed from a
context not marked as memory reclaim safe.
This is correct when flushing, but when called from the
cancel(_delayed)_work_sync() paths it is a false positive because work is
either already running, or will not be running at all. Therefore
cancelling it is safe and we can relax the warning criteria by letting the
helper know of the calling context.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin(a)igalia.com>
Fixes: fca839c00a12 ("workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue")
References: 746ae46c1113 ("drm/sched: Mark scheduler work queues with WQ_MEM_RECLAIM")
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Lai Jiangshan <jiangshanlai(a)gmail.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: Christian König <christian.koenig(a)amd.com
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v4.5+
---
kernel/workqueue.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9949ffad8df0..7abba81296cd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3680,23 +3680,27 @@ void workqueue_softirq_dead(unsigned int cpu)
* check_flush_dependency - check for flush dependency sanity
* @target_wq: workqueue being flushed
* @target_work: work item being flushed (NULL for workqueue flushes)
+ * @from_cancel: are we called from the work cancel path
*
* %current is trying to flush the whole @target_wq or @target_work on it.
- * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
- * reclaiming memory or running on a workqueue which doesn't have
- * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
- * a deadlock.
+ * If this is not the cancel path (which implies work being flushed is either
+ * already running, or will not be at all), check if @target_wq doesn't have
+ * %WQ_MEM_RECLAIM and verify that %current is not reclaiming memory or running
+ * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward-
+ * progress guarantee leading to a deadlock.
*/
static void check_flush_dependency(struct workqueue_struct *target_wq,
- struct work_struct *target_work)
+ struct work_struct *target_work,
+ bool from_cancel)
{
- work_func_t target_func = target_work ? target_work->func : NULL;
+ work_func_t target_func;
struct worker *worker;
- if (target_wq->flags & WQ_MEM_RECLAIM)
+ if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM)
return;
worker = current_wq_worker();
+ target_func = target_work ? target_work->func : NULL;
WARN_ONCE(current->flags & PF_MEMALLOC,
"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
@@ -3966,7 +3970,7 @@ void __flush_workqueue(struct workqueue_struct *wq)
list_add_tail(&this_flusher.list, &wq->flusher_overflow);
}
- check_flush_dependency(wq, NULL);
+ check_flush_dependency(wq, NULL, false);
mutex_unlock(&wq->mutex);
@@ -4141,7 +4145,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
}
wq = pwq->wq;
- check_flush_dependency(wq, work);
+ check_flush_dependency(wq, work, from_cancel);
insert_wq_barrier(pwq, barr, work, worker);
raw_spin_unlock_irq(&pool->lock);
--
2.47.1
Encoding file handles is usually performed by a filesystem >encode_fh()
method that may fail for various reasons.
The legacy users of exportfs_encode_fh(), namely, nfsd and
name_to_handle_at(2) syscall are ready to cope with the possibility
of failure to encode a file handle.
There are a few other users of exportfs_encode_{fh,fid}() that
currently have a WARN_ON() assertion when ->encode_fh() fails.
Relax those assertions because they are wrong.
The second linked bug report states commit 16aac5ad1fa9 ("ovl: support
encoding non-decodable file handles") in v6.6 as the regressing commit,
but this is not accurate.
The aforementioned commit only increases the chances of the assertion
and allows triggering the assertion with the reproducer using overlayfs,
inotify and drop_caches.
Triggering this assertion was always possible with other filesystems and
other reasons of ->encode_fh() failures and more particularly, it was
also possible with the exact same reproducer using overlayfs that is
mounted with options index=on,nfs_export=on also on kernels < v6.6.
Therefore, I am not listing the aforementioned commit as a Fixes commit.
Backport hint: this patch will have a trivial conflict applying to
v6.6.y, and other trivial conflicts applying to stable kernels < v6.6.
Reported-by: syzbot+ec07f6f5ce62b858579f(a)syzkaller.appspotmail.com
Tested-by: syzbot+ec07f6f5ce62b858579f(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-unionfs/671fd40c.050a0220.4735a.024f.GAE@goog…
Reported-by: Dmitry Safonov <dima(a)arista.com>
Closes: https://lore.kernel.org/linux-fsdevel/CAGrbwDTLt6drB9eaUagnQVgdPBmhLfqqxAf3…
Cc: stable(a)vger.kernel.org
Signed-off-by: Amir Goldstein <amir73il(a)gmail.com>
---
Christian,
I could have sumbitted two independant patches to relax the assertion
in fsnotify and overlayfs via fsnotify and overlayfs trees, but the
nature of the problem is the same and in both cases, the problem became
worse with the introduction of non-decodable file handles support,
so decided to fix them together and ask you to take the fix via the
vfs tree.
Please let you if you think it should be done differently.
Thanks,
Amir.
fs/notify/fdinfo.c | 4 +---
fs/overlayfs/copy_up.c | 5 ++---
2 files changed, 3 insertions(+), 6 deletions(-)
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index dec553034027e..e933f9c65d904 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -47,10 +47,8 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
size = f->handle_bytes >> 2;
ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size);
- if ((ret == FILEID_INVALID) || (ret < 0)) {
- WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
+ if ((ret == FILEID_INVALID) || (ret < 0))
return;
- }
f->handle_type = ret;
f->handle_bytes = size * sizeof(u32);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 3601ddfeddc2e..56eee9f23ea9a 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -442,9 +442,8 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
buflen = (dwords << 2);
err = -EIO;
- if (WARN_ON(fh_type < 0) ||
- WARN_ON(buflen > MAX_HANDLE_SZ) ||
- WARN_ON(fh_type == FILEID_INVALID))
+ if (fh_type < 0 || fh_type == FILEID_INVALID ||
+ WARN_ON(buflen > MAX_HANDLE_SZ))
goto out_err;
fh->fb.version = OVL_FH_VERSION;
--
2.34.1
Hi,
This series fixes the several suspend issues on Qcom platforms. Patch 1 fixes
the resume failure with spm_lvl=5 suspend on most of the Qcom platforms. For
this patch, I couldn't figure out the exact commit that caused the issue. So I
used the commit that introduced reinit support as a placeholder.
Patch 3 fixes the suspend issue on SM8550 and SM8650 platforms where UFS
PHY retention is not supported. Hence the default spm_lvl=3 suspend fails. So
this patch configures spm_lvl=5 as the default suspend level to force UFSHC/
device powerdown during suspend. This supersedes the previous series [1] that
tried to fix the issue in clock drivers.
This series is tested on Qcom SM8550 MTP and Qcom RB5 boards.
[1] https://lore.kernel.org/linux-arm-msm/20241107-ufs-clk-fix-v1-0-6032ff22a05…
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
---
Changes in v2:
- Changed 'ufs_qcom_drvdata::quirks' type to 'enum ufshcd_quirks'
- Collected tags
- Link to v1: https://lore.kernel.org/r/20241211-ufs-qcom-suspend-fix-v1-0-83ebbde76b1c@l…
---
Manivannan Sadhasivam (3):
scsi: ufs: qcom: Power off the PHY if it was already powered on in ufs_qcom_power_up_sequence()
scsi: ufs: qcom: Allow passing platform specific OF data
scsi: ufs: qcom: Power down the controller/device during system suspend for SM8550/SM8650 SoCs
drivers/ufs/core/ufshcd-priv.h | 6 ------
drivers/ufs/core/ufshcd.c | 1 -
drivers/ufs/host/ufs-qcom.c | 31 +++++++++++++++++++------------
drivers/ufs/host/ufs-qcom.h | 5 +++++
include/ufs/ufshcd.h | 2 --
5 files changed, 24 insertions(+), 21 deletions(-)
---
base-commit: 40384c840ea1944d7c5a392e8975ed088ecf0b37
change-id: 20241211-ufs-qcom-suspend-fix-5618e9c56d93
Best regards,
--
Manivannan Sadhasivam <manivannan.sadhasivam(a)linaro.org>
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
In the sh-sci driver, sci_ports[0] is used by earlycon. If the earlycon is
still active when sci_probe() is called and the new serial port is supposed
to map to sci_ports[0], return -EBUSY to prevent breaking the earlycon.
This situation should occurs in debug scenarios, and users should be
aware of the potential conflict.
Fixes: 0b0cced19ab1 ("serial: sh-sci: Add CONFIG_SERIAL_EARLYCON support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
drivers/tty/serial/sh-sci.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 373195995d3b..e12fbc71082a 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -158,6 +158,7 @@ struct sci_port {
bool has_rtscts;
bool autorts;
bool tx_occurred;
+ bool earlycon;
};
#define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS
@@ -3443,6 +3444,7 @@ static int sci_probe_single(struct platform_device *dev,
static int sci_probe(struct platform_device *dev)
{
struct plat_sci_port *p;
+ struct resource *res;
struct sci_port *sp;
unsigned int dev_id;
int ret;
@@ -3472,6 +3474,26 @@ static int sci_probe(struct platform_device *dev)
}
sp = &sci_ports[dev_id];
+
+ /*
+ * In case:
+ * - the probed port alias is zero (as the one used by earlycon), and
+ * - the earlycon is still active (e.g., "earlycon keep_bootcon" in
+ * bootargs)
+ *
+ * defer the probe of this serial. This is a debug scenario and the user
+ * must be aware of it.
+ *
+ * Except when the probed port is the same as the earlycon port.
+ */
+
+ res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ if (sp->earlycon && res->start != sp->port.mapbase)
+ return dev_err_probe(&dev->dev, -EBUSY, "sci_port[0] is used by earlycon!\n");
+
platform_set_drvdata(dev, sp);
ret = sci_probe_single(dev, dev_id, p, sp);
@@ -3568,6 +3590,7 @@ static int __init early_console_setup(struct earlycon_device *device,
port_cfg.type = type;
sci_ports[0].cfg = &port_cfg;
sci_ports[0].params = sci_probe_regmap(&port_cfg);
+ sci_ports[0].earlycon = true;
port_cfg.scscr = sci_serial_in(&sci_ports[0].port, SCSCR);
sci_serial_out(&sci_ports[0].port, SCSCR,
SCSCR_RE | SCSCR_TE | port_cfg.scscr);
--
2.39.2
The reference count of the device incremented in device_initialize() is
not decremented when device_add() fails. Add a put_device() call before
returning from the function to decrement reference count for cleanup.
Or it could cause memory leak.
As comment of device_add() says, if device_add() succeeds, you should
call device_del() when you want to get rid of it. If device_add() has
not succeeded, use only put_device() to drop the reference count.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: c8e4c2397655 ("RDMA/srp: Rework the srp_add_port() error path")
Signed-off-by: Ma Ke <make_ruc2021(a)163.com>
---
drivers/infiniband/ulp/srp/ib_srp.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 2916e77f589b..7289ae0b83ac 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3978,7 +3978,6 @@ static struct srp_host *srp_add_port(struct srp_device *device, u32 port)
return host;
put_host:
- device_del(&host->dev);
put_device(&host->dev);
return NULL;
}
--
2.25.1
From: James Morse <james.morse(a)arm.com>
commit 6685f5d572c22e1003e7c0d089afe1c64340ab1f upstream.
commit 011e5f5bf529f ("arm64/cpufeature: Add remaining feature bits in
ID_AA64PFR0 register") exposed the MPAM field of AA64PFR0_EL1 to guests,
but didn't add trap handling. A previous patch supplied the missing trap
handling.
Existing VMs that have the MPAM field of ID_AA64PFR0_EL1 set need to
be migratable, but there is little point enabling the MPAM CPU
interface on new VMs until there is something a guest can do with it.
Clear the MPAM field from the guest's ID_AA64PFR0_EL1 and on hardware
that supports MPAM, politely ignore the VMMs attempts to set this bit.
Guests exposed to this bug have the sanitised value of the MPAM field,
so only the correct value needs to be ignored. This means the field
can continue to be used to block migration to incompatible hardware
(between MPAM=1 and MPAM=5), and the VMM can't rely on the field
being ignored.
Signed-off-by: James Morse <james.morse(a)arm.com>
Co-developed-by: Joey Gouly <joey.gouly(a)arm.com>
Signed-off-by: Joey Gouly <joey.gouly(a)arm.com>
Reviewed-by: Gavin Shan <gshan(a)redhat.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi(a)huawei.com>
Reviewed-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20241030160317.2528209-7-joey.gouly@arm.com
Signed-off-by: Oliver Upton <oliver.upton(a)linux.dev>
[ joey: fixed up merge conflict, no ID_FILTERED macro in 6.6 ]
Signed-off-by: Joey Gouly <joey.gouly(a)arm.com>
Cc: stable(a)vger.kernel.org # 6.6.x
Cc: Vitaly Chikunov <vt(a)altlinux.org>
Link: https://lore.kernel.org/linux-arm-kernel/20241202045830.e4yy3nkvxtzaybxk@al…
---
This fixes an issue seen when using KVM with a 6.6 host kernel, and
newer (6.13+) kernels in the guest.
Tested with a stripped down version of set_id_regs from the original
patch series.
arch/arm64/kvm/sys_regs.c | 52 +++++++++++++++++++++++++++++++++++++--
1 file changed, 50 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 370a1a7bd369..2031703424ea 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1330,6 +1330,7 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MPAM_frac);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1472,6 +1473,13 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
val &= ~ID_AA64PFR0_EL1_AMU_MASK;
+ /*
+ * MPAM is disabled by default as KVM also needs a set of PARTID to
+ * program the MPAMVPMx_EL2 PARTID remapping registers with. But some
+ * older kernels let the guest see the ID bit.
+ */
+ val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+
return val;
}
@@ -1560,6 +1568,42 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu,
return set_id_reg(vcpu, rd, val);
}
+static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, u64 user_val)
+{
+ u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ u64 mpam_mask = ID_AA64PFR0_EL1_MPAM_MASK;
+
+ /*
+ * Commit 011e5f5bf529f ("arm64/cpufeature: Add remaining feature bits
+ * in ID_AA64PFR0 register") exposed the MPAM field of AA64PFR0_EL1 to
+ * guests, but didn't add trap handling. KVM doesn't support MPAM and
+ * always returns an UNDEF for these registers. The guest must see 0
+ * for this field.
+ *
+ * But KVM must also accept values from user-space that were provided
+ * by KVM. On CPUs that support MPAM, permit user-space to write
+ * the sanitizied value to ID_AA64PFR0_EL1.MPAM, but ignore this field.
+ */
+ if ((hw_val & mpam_mask) == (user_val & mpam_mask))
+ user_val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
+
+ return set_id_reg(vcpu, rd, user_val);
+}
+
+static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd, u64 user_val)
+{
+ u64 hw_val = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ u64 mpam_mask = ID_AA64PFR1_EL1_MPAM_frac_MASK;
+
+ /* See set_id_aa64pfr0_el1 for comment about MPAM */
+ if ((hw_val & mpam_mask) == (user_val & mpam_mask))
+ user_val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
+
+ return set_id_reg(vcpu, rd, user_val);
+}
+
/*
* cpufeature ID register user accessors
*
@@ -2018,10 +2062,14 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ID_AA64PFR0_EL1),
.access = access_id_reg,
.get_user = get_id_reg,
- .set_user = set_id_reg,
+ .set_user = set_id_aa64pfr0_el1,
.reset = read_sanitised_id_aa64pfr0_el1,
.val = ID_AA64PFR0_EL1_CSV2_MASK | ID_AA64PFR0_EL1_CSV3_MASK, },
- ID_SANITISED(ID_AA64PFR1_EL1),
+ { SYS_DESC(SYS_ID_AA64PFR1_EL1),
+ .access = access_id_reg,
+ .get_user = get_id_reg,
+ .set_user = set_id_aa64pfr1_el1,
+ .reset = kvm_read_sanitised_id_reg, },
ID_UNALLOCATED(4,2),
ID_UNALLOCATED(4,3),
ID_SANITISED(ID_AA64ZFR0_EL1),
--
2.25.1