The patch titled
Subject: kmap_local: don't assume kmap PTEs are linear arrays in memory
has been removed from the -mm tree. Its filename was
kmap_local-dont-assume-kmap-ptes-are-linear-arrays-in-memory.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Ard Biesheuvel <ardb(a)kernel.org>
Subject: kmap_local: don't assume kmap PTEs are linear arrays in memory
The kmap_local conversion broke the ARM architecture, because the new code
assumes that all PTEs used for creating kmaps form a linear array in
memory, and uses array indexing to look up the kmap PTE belonging to a
certain kmap index.
On ARM, this cannot work, not only because the PTE pages may be
non-adjacent in memory, but also because ARM/!LPAE interleaves hardware
entries and extended entries (carrying software-only bits) in a way that
is not compatible with array indexing.
Fortunately, this only seems to affect configurations with more than 8
CPUs, due to the way the per-CPU kmap slots are organized in memory.
Work around this by permitting an architecture to set a Kconfig symbol
that signifies that the kmap PTEs do not form a lineary array in memory,
and so the only way to locate the appropriate one is to walk the page
tables.
Link: https://lore.kernel.org/linux-arm-kernel/20211026131249.3731275-1-ardb@kern…
Link: https://lkml.kernel.org/r/20211116094737.7391-1-ardb@kernel.org
Fixes: 2a15ba82fa6c ("ARM: highmem: Switch to generic kmap atomic")
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
Reported-by: Quanyang Wang <quanyang.wang(a)windriver.com>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Acked-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm/Kconfig | 1 +
mm/Kconfig | 3 +++
mm/highmem.c | 32 +++++++++++++++++++++-----------
3 files changed, 25 insertions(+), 11 deletions(-)
--- a/arch/arm/Kconfig~kmap_local-dont-assume-kmap-ptes-are-linear-arrays-in-memory
+++ a/arch/arm/Kconfig
@@ -1463,6 +1463,7 @@ config HIGHMEM
bool "High Memory Support"
depends on MMU
select KMAP_LOCAL
+ select KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
help
The address space of ARM processors is only 4 Gigabytes large
and it has to accommodate user address space, kernel address
--- a/mm/highmem.c~kmap_local-dont-assume-kmap-ptes-are-linear-arrays-in-memory
+++ a/mm/highmem.c
@@ -503,16 +503,22 @@ static inline int kmap_local_calc_idx(in
static pte_t *__kmap_pte;
-static pte_t *kmap_get_pte(void)
+static pte_t *kmap_get_pte(unsigned long vaddr, int idx)
{
+ if (IS_ENABLED(CONFIG_KMAP_LOCAL_NON_LINEAR_PTE_ARRAY))
+ /*
+ * Set by the arch if __kmap_pte[-idx] does not produce
+ * the correct entry.
+ */
+ return virt_to_kpte(vaddr);
if (!__kmap_pte)
__kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
- return __kmap_pte;
+ return &__kmap_pte[-idx];
}
void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
{
- pte_t pteval, *kmap_pte = kmap_get_pte();
+ pte_t pteval, *kmap_pte;
unsigned long vaddr;
int idx;
@@ -524,9 +530,10 @@ void *__kmap_local_pfn_prot(unsigned lon
preempt_disable();
idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn);
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- BUG_ON(!pte_none(*(kmap_pte - idx)));
+ kmap_pte = kmap_get_pte(vaddr, idx);
+ BUG_ON(!pte_none(*kmap_pte));
pteval = pfn_pte(pfn, prot);
- arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte - idx, pteval);
+ arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval);
arch_kmap_local_post_map(vaddr, pteval);
current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
preempt_enable();
@@ -559,7 +566,7 @@ EXPORT_SYMBOL(__kmap_local_page_prot);
void kunmap_local_indexed(void *vaddr)
{
unsigned long addr = (unsigned long) vaddr & PAGE_MASK;
- pte_t *kmap_pte = kmap_get_pte();
+ pte_t *kmap_pte;
int idx;
if (addr < __fix_to_virt(FIX_KMAP_END) ||
@@ -584,8 +591,9 @@ void kunmap_local_indexed(void *vaddr)
idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr);
WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ kmap_pte = kmap_get_pte(addr, idx);
arch_kmap_local_pre_unmap(addr);
- pte_clear(&init_mm, addr, kmap_pte - idx);
+ pte_clear(&init_mm, addr, kmap_pte);
arch_kmap_local_post_unmap(addr);
current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
kmap_local_idx_pop();
@@ -607,7 +615,7 @@ EXPORT_SYMBOL(kunmap_local_indexed);
void __kmap_local_sched_out(void)
{
struct task_struct *tsk = current;
- pte_t *kmap_pte = kmap_get_pte();
+ pte_t *kmap_pte;
int i;
/* Clear kmaps */
@@ -634,8 +642,9 @@ void __kmap_local_sched_out(void)
idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ kmap_pte = kmap_get_pte(addr, idx);
arch_kmap_local_pre_unmap(addr);
- pte_clear(&init_mm, addr, kmap_pte - idx);
+ pte_clear(&init_mm, addr, kmap_pte);
arch_kmap_local_post_unmap(addr);
}
}
@@ -643,7 +652,7 @@ void __kmap_local_sched_out(void)
void __kmap_local_sched_in(void)
{
struct task_struct *tsk = current;
- pte_t *kmap_pte = kmap_get_pte();
+ pte_t *kmap_pte;
int i;
/* Restore kmaps */
@@ -663,7 +672,8 @@ void __kmap_local_sched_in(void)
/* See comment in __kmap_local_sched_out() */
idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
+ kmap_pte = kmap_get_pte(addr, idx);
+ set_pte_at(&init_mm, addr, kmap_pte, pteval);
arch_kmap_local_post_map(addr, pteval);
}
}
--- a/mm/Kconfig~kmap_local-dont-assume-kmap-ptes-are-linear-arrays-in-memory
+++ a/mm/Kconfig
@@ -890,6 +890,9 @@ config MAPPING_DIRTY_HELPERS
config KMAP_LOCAL
bool
+config KMAP_LOCAL_NON_LINEAR_PTE_ARRAY
+ bool
+
# struct io_mapping based helper. Selected by drivers that need them
config IO_MAPPING
bool
_
Patches currently in -mm which might be from ardb(a)kernel.org are
This is a partial revert of commit
29bc22ac5e5b ("binder: use euid from cred instead of using task").
Setting sender_euid using proc->cred caused some Android system test
regressions that need further investigation. It is a partial
reversion because subsequent patches rely on proc->cred.
Cc: stable(a)vger.kernel.org # 4.4+
Fixes: 29bc22ac5e5b ("binder: use euid from cred instead of using task")
Signed-off-by: Todd Kjos <tkjos(a)google.com>
Change-Id: I9b1769a3510fed250bb21859ef8beebabe034c66
---
- the issue was introduced in 5.16-rc1, so please apply to 5.16
- this should apply cleanly to all stable branches back to 4.4
that contain "binder: use euid from cred instead of using task"
drivers/android/binder.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 49fb74196d02..cffbe57a8e08 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2710,7 +2710,7 @@ static void binder_transaction(struct binder_proc *proc,
t->from = thread;
else
t->from = NULL;
- t->sender_euid = proc->cred->euid;
+ t->sender_euid = task_euid(proc->tsk);
t->to_proc = target_proc;
t->to_thread = target_thread;
t->code = tr->code;
--
2.34.0.rc1.387.gb447b232ab-goog
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 96869f193cfd26c2b47db32e4d8bcad50461df7a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon(a)kernel.org>
Date: Tue, 12 Oct 2021 16:15:25 +0300
Subject: [PATCH] net/mlx5: Set devlink reload feature bit for supported
devices only
Mulitport slave device doesn't support devlink reload, so instead of
complicating initialization flow with devlink_reload_enable() which
will be removed in next patch, don't set DEVLINK_F_RELOAD feature bit
for such devices.
This fixes an error when reload counters exposed (and equal zero) for
the mode that is not supported at all.
Fixes: d89ddaae1766 ("net/mlx5: Disable devlink reload for multi port slave device")
Signed-off-by: Leon Romanovsky <leonro(a)nvidia.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index fa98b7b95990..a85341a41cd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -796,6 +796,7 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink)
int mlx5_devlink_register(struct devlink *devlink)
{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
int err;
err = devlink_params_register(devlink, mlx5_devlink_params,
@@ -813,7 +814,9 @@ int mlx5_devlink_register(struct devlink *devlink)
if (err)
goto traps_reg_err;
- devlink_set_features(devlink, DEVLINK_F_RELOAD);
+ if (!mlx5_core_is_mp_slave(dev))
+ devlink_set_features(devlink, DEVLINK_F_RELOAD);
+
return 0;
traps_reg_err:
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 96869f193cfd26c2b47db32e4d8bcad50461df7a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon(a)kernel.org>
Date: Tue, 12 Oct 2021 16:15:25 +0300
Subject: [PATCH] net/mlx5: Set devlink reload feature bit for supported
devices only
Mulitport slave device doesn't support devlink reload, so instead of
complicating initialization flow with devlink_reload_enable() which
will be removed in next patch, don't set DEVLINK_F_RELOAD feature bit
for such devices.
This fixes an error when reload counters exposed (and equal zero) for
the mode that is not supported at all.
Fixes: d89ddaae1766 ("net/mlx5: Disable devlink reload for multi port slave device")
Signed-off-by: Leon Romanovsky <leonro(a)nvidia.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index fa98b7b95990..a85341a41cd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -796,6 +796,7 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink)
int mlx5_devlink_register(struct devlink *devlink)
{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
int err;
err = devlink_params_register(devlink, mlx5_devlink_params,
@@ -813,7 +814,9 @@ int mlx5_devlink_register(struct devlink *devlink)
if (err)
goto traps_reg_err;
- devlink_set_features(devlink, DEVLINK_F_RELOAD);
+ if (!mlx5_core_is_mp_slave(dev))
+ devlink_set_features(devlink, DEVLINK_F_RELOAD);
+
return 0;
traps_reg_err:
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e6ba5273d4ede03d075d7a116b8edad1f6115f4d Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley(a)intel.com>
Date: Thu, 9 Sep 2021 14:38:09 -0700
Subject: [PATCH] ice: Fix race conditions between virtchnl handling and VF ndo
ops
The VF can be configured via the PF's ndo ops at the same time the PF is
receiving/handling virtchnl messages. This has many issues, with
one of them being the ndo op could be actively resetting a VF (i.e.
resetting it to the default state and deleting/re-adding the VF's VSI)
while a virtchnl message is being handled. The following error was seen
because a VF ndo op was used to change a VF's trust setting while the
VIRTCHNL_OP_CONFIG_VSI_QUEUES was ongoing:
[35274.192484] ice 0000:88:00.0: Failed to set LAN Tx queue context, error: ICE_ERR_PARAM
[35274.193074] ice 0000:88:00.0: VF 0 failed opcode 6, retval: -5
[35274.193640] iavf 0000:88:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 6
Fix this by making sure the virtchnl handling and VF ndo ops that
trigger VF resets cannot run concurrently. This is done by adding a
struct mutex cfg_lock to each VF structure. For VF ndo ops, the mutex
will be locked around the critical operations and VFR. Since the ndo ops
will trigger a VFR, the virtchnl thread will use mutex_trylock(). This
is done because if any other thread (i.e. VF ndo op) has the mutex, then
that means the current VF message being handled is no longer valid, so
just ignore it.
This issue can be seen using the following commands:
for i in {0..50}; do
rmmod ice
modprobe ice
sleep 1
echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs
echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs
ip link set ens785f1 vf 0 trust on
ip link set ens785f0 vf 0 trust on
sleep 2
echo 0 > /sys/class/net/ens785f0/device/sriov_numvfs
echo 0 > /sys/class/net/ens785f1/device/sriov_numvfs
sleep 1
echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs
echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs
ip link set ens785f1 vf 0 trust on
ip link set ens785f0 vf 0 trust on
done
Fixes: 7c710869d64e ("ice: Add handlers for VF netdevice operations")
Signed-off-by: Brett Creeley <brett.creeley(a)intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 3f727df3b6fb..217ff5e9a6f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -650,6 +650,8 @@ void ice_free_vfs(struct ice_pf *pf)
set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
ice_free_vf_res(&pf->vf[i]);
}
+
+ mutex_destroy(&pf->vf[i].cfg_lock);
}
if (ice_sriov_free_msix_res(pf))
@@ -1946,6 +1948,8 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
ice_vf_fdir_init(vf);
ice_vc_set_dflt_vf_ops(&vf->vc_ops);
+
+ mutex_init(&vf->cfg_lock);
}
}
@@ -4135,6 +4139,8 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
return 0;
}
+ mutex_lock(&vf->cfg_lock);
+
vf->port_vlan_info = vlanprio;
if (vf->port_vlan_info)
@@ -4144,6 +4150,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
ice_vc_reset_vf(vf);
+ mutex_unlock(&vf->cfg_lock);
return 0;
}
@@ -4683,6 +4690,15 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
return;
}
+ /* VF is being configured in another context that triggers a VFR, so no
+ * need to process this message
+ */
+ if (!mutex_trylock(&vf->cfg_lock)) {
+ dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n",
+ vf->vf_id);
+ return;
+ }
+
switch (v_opcode) {
case VIRTCHNL_OP_VERSION:
err = ops->get_ver_msg(vf, msg);
@@ -4771,6 +4787,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
vf_id, v_opcode, err);
}
+
+ mutex_unlock(&vf->cfg_lock);
}
/**
@@ -4886,6 +4904,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
return -EINVAL;
}
+ mutex_lock(&vf->cfg_lock);
+
/* VF is notified of its new MAC via the PF's response to the
* VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
*/
@@ -4904,6 +4924,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
}
ice_vc_reset_vf(vf);
+ mutex_unlock(&vf->cfg_lock);
return 0;
}
@@ -4938,11 +4959,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
if (trusted == vf->trusted)
return 0;
+ mutex_lock(&vf->cfg_lock);
+
vf->trusted = trusted;
ice_vc_reset_vf(vf);
dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
vf_id, trusted ? "" : "un");
+ mutex_unlock(&vf->cfg_lock);
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 5ff93a08f54c..7e28ecbbe7af 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -100,6 +100,11 @@ struct ice_vc_vf_ops {
struct ice_vf {
struct ice_pf *pf;
+ /* Used during virtchnl message handling and NDO ops against the VF
+ * that will trigger a VFR
+ */
+ struct mutex cfg_lock;
+
u16 vf_id; /* VF ID in the PF space */
u16 lan_vsi_idx; /* index into PF struct */
u16 ctrl_vsi_idx;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e6ba5273d4ede03d075d7a116b8edad1f6115f4d Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley(a)intel.com>
Date: Thu, 9 Sep 2021 14:38:09 -0700
Subject: [PATCH] ice: Fix race conditions between virtchnl handling and VF ndo
ops
The VF can be configured via the PF's ndo ops at the same time the PF is
receiving/handling virtchnl messages. This has many issues, with
one of them being the ndo op could be actively resetting a VF (i.e.
resetting it to the default state and deleting/re-adding the VF's VSI)
while a virtchnl message is being handled. The following error was seen
because a VF ndo op was used to change a VF's trust setting while the
VIRTCHNL_OP_CONFIG_VSI_QUEUES was ongoing:
[35274.192484] ice 0000:88:00.0: Failed to set LAN Tx queue context, error: ICE_ERR_PARAM
[35274.193074] ice 0000:88:00.0: VF 0 failed opcode 6, retval: -5
[35274.193640] iavf 0000:88:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 6
Fix this by making sure the virtchnl handling and VF ndo ops that
trigger VF resets cannot run concurrently. This is done by adding a
struct mutex cfg_lock to each VF structure. For VF ndo ops, the mutex
will be locked around the critical operations and VFR. Since the ndo ops
will trigger a VFR, the virtchnl thread will use mutex_trylock(). This
is done because if any other thread (i.e. VF ndo op) has the mutex, then
that means the current VF message being handled is no longer valid, so
just ignore it.
This issue can be seen using the following commands:
for i in {0..50}; do
rmmod ice
modprobe ice
sleep 1
echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs
echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs
ip link set ens785f1 vf 0 trust on
ip link set ens785f0 vf 0 trust on
sleep 2
echo 0 > /sys/class/net/ens785f0/device/sriov_numvfs
echo 0 > /sys/class/net/ens785f1/device/sriov_numvfs
sleep 1
echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs
echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs
ip link set ens785f1 vf 0 trust on
ip link set ens785f0 vf 0 trust on
done
Fixes: 7c710869d64e ("ice: Add handlers for VF netdevice operations")
Signed-off-by: Brett Creeley <brett.creeley(a)intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 3f727df3b6fb..217ff5e9a6f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -650,6 +650,8 @@ void ice_free_vfs(struct ice_pf *pf)
set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
ice_free_vf_res(&pf->vf[i]);
}
+
+ mutex_destroy(&pf->vf[i].cfg_lock);
}
if (ice_sriov_free_msix_res(pf))
@@ -1946,6 +1948,8 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
ice_vf_fdir_init(vf);
ice_vc_set_dflt_vf_ops(&vf->vc_ops);
+
+ mutex_init(&vf->cfg_lock);
}
}
@@ -4135,6 +4139,8 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
return 0;
}
+ mutex_lock(&vf->cfg_lock);
+
vf->port_vlan_info = vlanprio;
if (vf->port_vlan_info)
@@ -4144,6 +4150,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
ice_vc_reset_vf(vf);
+ mutex_unlock(&vf->cfg_lock);
return 0;
}
@@ -4683,6 +4690,15 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
return;
}
+ /* VF is being configured in another context that triggers a VFR, so no
+ * need to process this message
+ */
+ if (!mutex_trylock(&vf->cfg_lock)) {
+ dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n",
+ vf->vf_id);
+ return;
+ }
+
switch (v_opcode) {
case VIRTCHNL_OP_VERSION:
err = ops->get_ver_msg(vf, msg);
@@ -4771,6 +4787,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
vf_id, v_opcode, err);
}
+
+ mutex_unlock(&vf->cfg_lock);
}
/**
@@ -4886,6 +4904,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
return -EINVAL;
}
+ mutex_lock(&vf->cfg_lock);
+
/* VF is notified of its new MAC via the PF's response to the
* VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
*/
@@ -4904,6 +4924,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
}
ice_vc_reset_vf(vf);
+ mutex_unlock(&vf->cfg_lock);
return 0;
}
@@ -4938,11 +4959,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
if (trusted == vf->trusted)
return 0;
+ mutex_lock(&vf->cfg_lock);
+
vf->trusted = trusted;
ice_vc_reset_vf(vf);
dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
vf_id, trusted ? "" : "un");
+ mutex_unlock(&vf->cfg_lock);
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 5ff93a08f54c..7e28ecbbe7af 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -100,6 +100,11 @@ struct ice_vc_vf_ops {
struct ice_vf {
struct ice_pf *pf;
+ /* Used during virtchnl message handling and NDO ops against the VF
+ * that will trigger a VFR
+ */
+ struct mutex cfg_lock;
+
u16 vf_id; /* VF ID in the PF space */
u16 lan_vsi_idx; /* index into PF struct */
u16 ctrl_vsi_idx;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e6ba5273d4ede03d075d7a116b8edad1f6115f4d Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley(a)intel.com>
Date: Thu, 9 Sep 2021 14:38:09 -0700
Subject: [PATCH] ice: Fix race conditions between virtchnl handling and VF ndo
ops
The VF can be configured via the PF's ndo ops at the same time the PF is
receiving/handling virtchnl messages. This has many issues, with
one of them being the ndo op could be actively resetting a VF (i.e.
resetting it to the default state and deleting/re-adding the VF's VSI)
while a virtchnl message is being handled. The following error was seen
because a VF ndo op was used to change a VF's trust setting while the
VIRTCHNL_OP_CONFIG_VSI_QUEUES was ongoing:
[35274.192484] ice 0000:88:00.0: Failed to set LAN Tx queue context, error: ICE_ERR_PARAM
[35274.193074] ice 0000:88:00.0: VF 0 failed opcode 6, retval: -5
[35274.193640] iavf 0000:88:01.0: PF returned error -5 (IAVF_ERR_PARAM) to our request 6
Fix this by making sure the virtchnl handling and VF ndo ops that
trigger VF resets cannot run concurrently. This is done by adding a
struct mutex cfg_lock to each VF structure. For VF ndo ops, the mutex
will be locked around the critical operations and VFR. Since the ndo ops
will trigger a VFR, the virtchnl thread will use mutex_trylock(). This
is done because if any other thread (i.e. VF ndo op) has the mutex, then
that means the current VF message being handled is no longer valid, so
just ignore it.
This issue can be seen using the following commands:
for i in {0..50}; do
rmmod ice
modprobe ice
sleep 1
echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs
echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs
ip link set ens785f1 vf 0 trust on
ip link set ens785f0 vf 0 trust on
sleep 2
echo 0 > /sys/class/net/ens785f0/device/sriov_numvfs
echo 0 > /sys/class/net/ens785f1/device/sriov_numvfs
sleep 1
echo 1 > /sys/class/net/ens785f0/device/sriov_numvfs
echo 1 > /sys/class/net/ens785f1/device/sriov_numvfs
ip link set ens785f1 vf 0 trust on
ip link set ens785f0 vf 0 trust on
done
Fixes: 7c710869d64e ("ice: Add handlers for VF netdevice operations")
Signed-off-by: Brett Creeley <brett.creeley(a)intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski(a)intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen(a)intel.com>
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 3f727df3b6fb..217ff5e9a6f1 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -650,6 +650,8 @@ void ice_free_vfs(struct ice_pf *pf)
set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
ice_free_vf_res(&pf->vf[i]);
}
+
+ mutex_destroy(&pf->vf[i].cfg_lock);
}
if (ice_sriov_free_msix_res(pf))
@@ -1946,6 +1948,8 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
ice_vf_fdir_init(vf);
ice_vc_set_dflt_vf_ops(&vf->vc_ops);
+
+ mutex_init(&vf->cfg_lock);
}
}
@@ -4135,6 +4139,8 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
return 0;
}
+ mutex_lock(&vf->cfg_lock);
+
vf->port_vlan_info = vlanprio;
if (vf->port_vlan_info)
@@ -4144,6 +4150,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
ice_vc_reset_vf(vf);
+ mutex_unlock(&vf->cfg_lock);
return 0;
}
@@ -4683,6 +4690,15 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
return;
}
+ /* VF is being configured in another context that triggers a VFR, so no
+ * need to process this message
+ */
+ if (!mutex_trylock(&vf->cfg_lock)) {
+ dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n",
+ vf->vf_id);
+ return;
+ }
+
switch (v_opcode) {
case VIRTCHNL_OP_VERSION:
err = ops->get_ver_msg(vf, msg);
@@ -4771,6 +4787,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event)
dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
vf_id, v_opcode, err);
}
+
+ mutex_unlock(&vf->cfg_lock);
}
/**
@@ -4886,6 +4904,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
return -EINVAL;
}
+ mutex_lock(&vf->cfg_lock);
+
/* VF is notified of its new MAC via the PF's response to the
* VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
*/
@@ -4904,6 +4924,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
}
ice_vc_reset_vf(vf);
+ mutex_unlock(&vf->cfg_lock);
return 0;
}
@@ -4938,11 +4959,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
if (trusted == vf->trusted)
return 0;
+ mutex_lock(&vf->cfg_lock);
+
vf->trusted = trusted;
ice_vc_reset_vf(vf);
dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
vf_id, trusted ? "" : "un");
+ mutex_unlock(&vf->cfg_lock);
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 5ff93a08f54c..7e28ecbbe7af 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -100,6 +100,11 @@ struct ice_vc_vf_ops {
struct ice_vf {
struct ice_pf *pf;
+ /* Used during virtchnl message handling and NDO ops against the VF
+ * that will trigger a VFR
+ */
+ struct mutex cfg_lock;
+
u16 vf_id; /* VF ID in the PF space */
u16 lan_vsi_idx; /* index into PF struct */
u16 ctrl_vsi_idx;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 2ff04286a9569675948f39cec2c6ad47c3584633 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leon(a)kernel.org>
Date: Thu, 23 Sep 2021 21:12:52 +0300
Subject: [PATCH] ice: Delete always true check of PF pointer
PF pointer is always valid when PCI core calls its .shutdown() and
.remove() callbacks. There is no need to check it again.
Fixes: 837f08fdecbe ("ice: Add basic driver framework for Intel(R) E800 Series")
Signed-off-by: Leon Romanovsky <leonro(a)nvidia.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 34e64533026a..aacc0b345bbe 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4593,9 +4593,6 @@ static void ice_remove(struct pci_dev *pdev)
struct ice_pf *pf = pci_get_drvdata(pdev);
int i;
- if (!pf)
- return;
-
for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
if (!ice_is_reset_in_progress(pf->state))
break;