From: Tony Krowiak <akrowiak(a)linux.ibm.com>
When filtering the adapters from the configuration profile for a guest to
create or update a guest's AP configuration, if the APID of an adapter and
the APQI of a domain identify a queue device that is not bound to the
vfio_ap device driver, the APID of the adapter will be filtered because an
individual APQN can not be filtered due to the fact the APQNs are assigned
to an AP configuration as a matrix of APIDs and APQIs. Consequently, a
guest will not have access to all of the queues associated with the
filtered adapter. If the queues are subsequently made available again to
the guest, they should re-appear in a reset state; so, let's make sure all
queues associated with an adapter unplugged from the guest are reset.
In order to identify the set of queues that need to be reset, let's allow a
vfio_ap_queue object to be simultaneously stored in both a hashtable and a
list: A hashtable used to store all of the queues assigned
to a matrix mdev; and/or, a list used to store a subset of the queues that
need to be reset. For example, when an adapter is hot unplugged from a
guest, all guest queues associated with that adapter must be reset. Since
that may be a subset of those assigned to the matrix mdev, they can be
stored in a list that can be passed to the vfio_ap_mdev_reset_queues
function.
Signed-off-by: Tony Krowiak <akrowiak(a)linux.ibm.com>
Acked-by: Halil Pasic <pasic(a)linux.ibm.com>
Fixes: 48cae940c31d ("s390/vfio-ap: refresh guest's APCB by filtering AP resources assigned to mdev")
Cc: <stable(a)vger.kernel.org>
---
drivers/s390/crypto/vfio_ap_ops.c | 171 +++++++++++++++++++-------
drivers/s390/crypto/vfio_ap_private.h | 11 +-
2 files changed, 133 insertions(+), 49 deletions(-)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index e45d0bf7b126..44cd29aace8e 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -32,7 +32,8 @@
#define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */
-static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
+static int vfio_ap_mdev_reset_qlist(struct list_head *qlist);
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
@@ -665,16 +666,23 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev)
* device driver.
*
* @matrix_mdev: the matrix mdev whose matrix is to be filtered.
+ * @apm_filtered: a 256-bit bitmap for storing the APIDs filtered from the
+ * guest's AP configuration that are still in the host's AP
+ * configuration.
*
* Note: If an APQN referencing a queue device that is not bound to the vfio_ap
* driver, its APID will be filtered from the guest's APCB. The matrix
* structure precludes filtering an individual APQN, so its APID will be
- * filtered.
+ * filtered. Consequently, all queues associated with the adapter that
+ * are in the host's AP configuration must be reset. If queues are
+ * subsequently made available again to the guest, they should re-appear
+ * in a reset state
*
* Return: a boolean value indicating whether the KVM guest's APCB was changed
* by the filtering or not.
*/
-static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
+static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *apm_filtered)
{
unsigned long apid, apqi, apqn;
DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES);
@@ -684,6 +692,7 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES);
bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS);
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb);
+ bitmap_clear(apm_filtered, 0, AP_DEVICES);
/*
* Copy the adapters, domains and control domains to the shadow_apcb
@@ -709,8 +718,16 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
apqn = AP_MKQID(apid, apqi);
q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
if (!q || q->reset_status.response_code) {
- clear_bit_inv(apid,
- matrix_mdev->shadow_apcb.apm);
+ clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
+
+ /*
+ * If the adapter was previously plugged into
+ * the guest, let's let the caller know that
+ * the APID was filtered.
+ */
+ if (test_bit_inv(apid, prev_shadow_apm))
+ set_bit_inv(apid, apm_filtered);
+
break;
}
}
@@ -812,7 +829,7 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev)
mutex_lock(&matrix_dev->guests_lock);
mutex_lock(&matrix_dev->mdevs_lock);
- vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ vfio_ap_mdev_reset_queues(matrix_mdev);
vfio_ap_mdev_unlink_fr_queues(matrix_mdev);
list_del(&matrix_mdev->node);
mutex_unlock(&matrix_dev->mdevs_lock);
@@ -922,6 +939,47 @@ static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev,
AP_MKQID(apid, apqi));
}
+static int reset_queues_for_apids(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *apm_reset)
+{
+ struct vfio_ap_queue *q, *tmpq;
+ struct list_head qlist;
+ unsigned long apid, apqi;
+ int apqn, ret = 0;
+
+ if (bitmap_empty(apm_reset, AP_DEVICES))
+ return 0;
+
+ INIT_LIST_HEAD(&qlist);
+
+ for_each_set_bit_inv(apid, apm_reset, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm,
+ AP_DOMAINS) {
+ /*
+ * If the domain is not in the host's AP configuration,
+ * then resetting it will fail with response code 01
+ * (APQN not valid).
+ */
+ if (!test_bit_inv(apqi,
+ (unsigned long *)matrix_dev->info.aqm))
+ continue;
+
+ apqn = AP_MKQID(apid, apqi);
+ q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
+
+ if (q)
+ list_add_tail(&q->reset_qnode, &qlist);
+ }
+ }
+
+ ret = vfio_ap_mdev_reset_qlist(&qlist);
+
+ list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode)
+ list_del(&q->reset_qnode);
+
+ return ret;
+}
+
/**
* assign_adapter_store - parses the APID from @buf and sets the
* corresponding bit in the mediated matrix device's APM
@@ -962,6 +1020,7 @@ static ssize_t assign_adapter_store(struct device *dev,
{
int ret;
unsigned long apid;
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@@ -991,8 +1050,10 @@ static ssize_t assign_adapter_store(struct device *dev,
vfio_ap_mdev_link_adapter(matrix_mdev, apid);
- if (vfio_ap_mdev_filter_matrix(matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+ }
ret = count;
done:
@@ -1023,11 +1084,12 @@ static struct vfio_ap_queue
* adapter was assigned.
* @matrix_mdev: the matrix mediated device to which the adapter was assigned.
* @apid: the APID of the unassigned adapter.
- * @qtable: table for storing queues associated with unassigned adapter.
+ * @qlist: list for storing queues associated with unassigned adapter that
+ * need to be reset.
*/
static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid,
- struct ap_queue_table *qtable)
+ struct list_head *qlist)
{
unsigned long apqi;
struct vfio_ap_queue *q;
@@ -1035,11 +1097,10 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) {
q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
- if (q && qtable) {
+ if (q && qlist) {
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
- hash_add(qtable->queues, &q->mdev_qnode,
- q->apqn);
+ list_add_tail(&q->reset_qnode, qlist);
}
}
}
@@ -1047,26 +1108,23 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid)
{
- int loop_cursor;
- struct vfio_ap_queue *q;
- struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
+ struct vfio_ap_queue *q, *tmpq;
+ struct list_head qlist;
- hash_init(qtable->queues);
- vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable);
+ INIT_LIST_HEAD(&qlist);
+ vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, &qlist);
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) {
clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
}
- vfio_ap_mdev_reset_queues(qtable);
+ vfio_ap_mdev_reset_qlist(&qlist);
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) {
vfio_ap_unlink_mdev_fr_queue(q);
- hash_del(&q->mdev_qnode);
+ list_del(&q->reset_qnode);
}
-
- kfree(qtable);
}
/**
@@ -1167,6 +1225,7 @@ static ssize_t assign_domain_store(struct device *dev,
{
int ret;
unsigned long apqi;
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@@ -1196,8 +1255,10 @@ static ssize_t assign_domain_store(struct device *dev,
vfio_ap_mdev_link_domain(matrix_mdev, apqi);
- if (vfio_ap_mdev_filter_matrix(matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+ }
ret = count;
done:
@@ -1210,7 +1271,7 @@ static DEVICE_ATTR_WO(assign_domain);
static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
unsigned long apqi,
- struct ap_queue_table *qtable)
+ struct list_head *qlist)
{
unsigned long apid;
struct vfio_ap_queue *q;
@@ -1218,11 +1279,10 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) {
q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
- if (q && qtable) {
+ if (q && qlist) {
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
- hash_add(qtable->queues, &q->mdev_qnode,
- q->apqn);
+ list_add_tail(&q->reset_qnode, qlist);
}
}
}
@@ -1230,26 +1290,23 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev,
unsigned long apqi)
{
- int loop_cursor;
- struct vfio_ap_queue *q;
- struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
+ struct vfio_ap_queue *q, *tmpq;
+ struct list_head qlist;
- hash_init(qtable->queues);
- vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable);
+ INIT_LIST_HEAD(&qlist);
+ vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, &qlist);
if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
}
- vfio_ap_mdev_reset_queues(qtable);
+ vfio_ap_mdev_reset_qlist(&qlist);
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) {
vfio_ap_unlink_mdev_fr_queue(q);
- hash_del(&q->mdev_qnode);
+ list_del(&q->reset_qnode);
}
-
- kfree(qtable);
}
/**
@@ -1604,7 +1661,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
get_update_locks_for_kvm(kvm);
kvm_arch_crypto_clear_masks(kvm);
- vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ vfio_ap_mdev_reset_queues(matrix_mdev);
kvm_put_kvm(kvm);
matrix_mdev->kvm = NULL;
@@ -1740,15 +1797,33 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
}
}
-static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
{
int ret = 0, loop_cursor;
struct vfio_ap_queue *q;
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
+ hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode)
vfio_ap_mdev_reset_queue(q);
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) {
+ flush_work(&q->reset_work);
+
+ if (q->reset_status.response_code)
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+static int vfio_ap_mdev_reset_qlist(struct list_head *qlist)
+{
+ int ret = 0;
+ struct vfio_ap_queue *q;
+
+ list_for_each_entry(q, qlist, reset_qnode)
+ vfio_ap_mdev_reset_queue(q);
+
+ list_for_each_entry(q, qlist, reset_qnode) {
flush_work(&q->reset_work);
if (q->reset_status.response_code)
@@ -1934,7 +2009,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
ret = vfio_ap_mdev_get_device_info(arg);
break;
case VFIO_DEVICE_RESET:
- ret = vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ ret = vfio_ap_mdev_reset_queues(matrix_mdev);
break;
case VFIO_DEVICE_GET_IRQ_INFO:
ret = vfio_ap_get_irq_info(arg);
@@ -2080,6 +2155,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
{
int ret;
struct vfio_ap_queue *q;
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev;
ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group);
@@ -2112,15 +2188,17 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
!bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS))
goto done;
- if (vfio_ap_mdev_filter_matrix(matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+ }
}
done:
dev_set_drvdata(&apdev->device, q);
release_update_locks_for_mdev(matrix_mdev);
- return 0;
+ return ret;
err_remove_group:
sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group);
@@ -2464,6 +2542,7 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info,
static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
{
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false;
mutex_lock(&matrix_mdev->kvm->lock);
@@ -2477,7 +2556,7 @@ static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
matrix_mdev->adm_add, AP_DOMAINS);
if (filter_adapters || filter_domains)
- do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev);
+ do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered);
if (filter_cdoms)
do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev);
@@ -2485,6 +2564,8 @@ static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
if (do_hotplug)
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+
mutex_unlock(&matrix_dev->mdevs_lock);
mutex_unlock(&matrix_mdev->kvm->lock);
}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 88aff8b81f2f..20eac8b0f0b9 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -83,10 +83,10 @@ struct ap_matrix {
};
/**
- * struct ap_queue_table - a table of queue objects.
- *
- * @queues: a hashtable of queues (struct vfio_ap_queue).
- */
+ * struct ap_queue_table - a table of queue objects.
+ *
+ * @queues: a hashtable of queues (struct vfio_ap_queue).
+ */
struct ap_queue_table {
DECLARE_HASHTABLE(queues, 8);
};
@@ -133,6 +133,8 @@ struct ap_matrix_mdev {
* @apqn: the APQN of the AP queue device
* @saved_isc: the guest ISC registered with the GIB interface
* @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
+ * @reset_qnode: allows the vfio_ap_queue struct to be added to a list of queues
+ * that need to be reset
* @reset_status: the status from the last reset of the queue
* @reset_work: work to wait for queue reset to complete
*/
@@ -143,6 +145,7 @@ struct vfio_ap_queue {
#define VFIO_AP_ISC_INVALID 0xff
unsigned char saved_isc;
struct hlist_node mdev_qnode;
+ struct list_head reset_qnode;
struct ap_queue_status reset_status;
struct work_struct reset_work;
};
--
2.43.0
hi,
we need to be able to use latest pahole options for 6.1 kernels,
updating the scripts/pahole-flags.sh with that (clean backports).
thanks,
jirka
---
Alan Maguire (1):
bpf: Add --skip_encoding_btf_inconsistent_proto, --btf_gen_optimized to pahole flags for v1.25
Martin Rodriguez Reboredo (1):
btf, scripts: Exclude Rust CUs with pahole
init/Kconfig | 2 +-
lib/Kconfig.debug | 9 +++++++++
scripts/pahole-flags.sh | 7 +++++++
3 files changed, 17 insertions(+), 1 deletion(-)
[ Upstream commit a7939f01672034a58ad3fdbce69bb6c665ce0024 ]
Builtin/initrd microcode will not be used the ucode loader is disabled.
But currently, save_microcode_in_initrd is always performed and it
accesses MSR_IA32_UCODE_REV even if dis_ucode_ldr is true, and in
particular even if X86_FEATURE_HYPERVISOR is set; the TDX module does not
implement the MSR and the result is a call trace at boot for TDX guests.
Mainline Linux fixed this as part of a more complex rework of microcode
caching that went into 6.7 (see in particular commits dd5e3e3ca6,
"x86/microcode/intel: Simplify early loading"; and a7939f0167203,
"x86/microcode/amd: Cache builtin/initrd microcode early"). Do the bare
minimum in stable kernels, setting initrd_gone just like mainline Linux
does in mark_initrd_gone().
Note that save_microcode_in_initrd() is not in the microcode application
path, which runs with paging disabled on 32-bit systems, so it can (and
has to) use dis_ucode_ldr instead of check_loader_disabled_ap().
Cc: stable(a)vger.kernel.org # v6.6+
Cc: x86(a)kernel.org # v6.6+
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kernel/cpu/microcode/core.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 35d39a13dc90..503b5da56685 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -214,6 +214,11 @@ static int __init save_microcode_in_initrd(void)
struct cpuinfo_x86 *c = &boot_cpu_data;
int ret = -EINVAL;
+ if (dis_ucode_ldr) {
+ ret = 0;
+ goto out;
+ }
+
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
if (c->x86 >= 6)
@@ -227,6 +230,7 @@ static int __init save_microcode_in_initrd(void)
break;
}
+out:
initrd_gone = true;
return ret;
--
2.43.0
Some DSA tagging protocols change the EtherType field in the MAC header
e.g. DSA_TAG_PROTO_(DSA/EDSA/BRCM/MTK/RTL4C_A/SJA1105). On TX these tagged
frames are ignored by the checksum offload engine and IP header checker of
some stmmac cores.
On RX, the stmmac driver wrongly assumes that checksums have been computed
for these tagged packets, and sets CHECKSUM_UNNECESSARY.
Add an additional check in the stmmac TX and RX hotpaths so that COE is
deactivated for packets with ethertypes that will not trigger the COE and
IP header checks.
Fixes: 6b2c6e4a938f ("net: stmmac: propagate feature flags to vlan")
Cc: <stable(a)vger.kernel.org>
Reported-by: Richard Tresidder <rtresidd(a)electromag.com.au>
Link: https://lore.kernel.org/netdev/e5c6c75f-2dfa-4e50-a1fb-6bf4cdb617c2@electro…
Reported-by: Romain Gantois <romain.gantois(a)bootlin.com>
Link: https://lore.kernel.org/netdev/c57283ed-6b9b-b0e6-ee12-5655c1c54495@bootlin…
Reviewed-by: Vladimir Oltean <vladimir.oltean(a)nxp.com>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Signed-off-by: Romain Gantois <romain.gantois(a)bootlin.com>
---
Hello everyone,
This is the sixth version of my proposed fix for the stmmac checksum
offloading issue that has recently been reported.
significant changes in v4:
- Removed "inline" from declaration of stmmac_has_ip_ethertype
significant changes in v3:
- Use __vlan_get_protocol to make sure that 8021Q-encapsulated
traffic is checked correctly.
significant changes in v2:
- Replaced the stmmac_link_up-based fix with an ethertype check in the TX
and RX hotpaths.
The Checksum Offloading Engine of some stmmac cores (e.g. DWMAC1000)
computes an incorrect checksum when presented with DSA-tagged packets. This
causes all TCP/UDP transfers to break when the stmmac device is connected
to the CPU port of a DSA switch.
I ran some tests using different tagging protocols with DSA_LOOP, and all
of the protocols that set a custom ethertype field in the MAC header caused
the checksum offload engine to ignore the tagged packets. On TX, this
caused packets to egress with incorrect checksums. On RX, these packets
were similarly ignored by the COE, yet the stmmac driver set
CHECKSUM_UNNECESSARY, wrongly assuming that their checksums had been
verified in hardware.
Version 2 of this patch series fixes this issue by checking ethertype
fields in both the TX and RX hotpaths of the stmmac driver. On TX, if a
non-IP ethertype is detected, the packet is checksummed in software. On
RX, the same condition causes stmmac to avoid setting CHECKSUM_UNNECESSARY.
To measure the performance degradation to the TX/RX hotpaths, I did some
iperf3 runs with 512-byte unfragmented UDP packets.
measured degradation on TX: -466 pps (-0.2%) on RX: -338 pps (-1.2%)
original performances on TX: 22kpps on RX: 27kpps
The performance hit on the RX path can be partly explained by the fact that
the stmmac driver doesn't set CHECKSUM_UNNECESSARY anymore.
The TX performance degradation observed in v2 seems to have improved.
It's not entirely clear to me why that is.
Best Regards,
Romain
Romain Gantois (1):
net: stmmac: Prevent DSA tags from breaking COE
.../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 ++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
--
2.43.0
---
Changes in v6:
- Style fixes
- Link to v5: https://lore.kernel.org/r/20240111-prevent_dsa_tags-v5-1-63e795a4d129@bootl…
Changes in v5:
- Added missing "net" tag to subject of patch series
- Link to v4: https://lore.kernel.org/r/20240109-prevent_dsa_tags-v4-1-f888771fa2f6@bootl…
---
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 32 ++++++++++++++++++++---
1 file changed, 29 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c78a96b8eb64..a0e46369ae15 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4435,6 +4435,28 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+/**
+ * stmmac_has_ip_ethertype() - Check if packet has IP ethertype
+ * @skb: socket buffer to check
+ *
+ * Check if a packet has an ethertype that will trigger the IP header checks
+ * and IP/TCP checksum engine of the stmmac core.
+ *
+ * Return: true if the ethertype can trigger the checksum engine, false
+ * otherwise
+ */
+static bool stmmac_has_ip_ethertype(struct sk_buff *skb)
+{
+ int depth = 0;
+ __be16 proto;
+
+ proto = __vlan_get_protocol(skb, eth_header_parse_protocol(skb),
+ &depth);
+
+ return (depth <= ETH_HLEN) &&
+ (proto == htons(ETH_P_IP) || proto == htons(ETH_P_IPV6));
+}
+
/**
* stmmac_xmit - Tx entry point of the driver
* @skb : the socket buffer
@@ -4499,9 +4521,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
/* DWMAC IPs can be synthesized to support tx coe only for a few tx
* queues. In that case, checksum offloading for those queues that don't
* support tx coe needs to fallback to software checksum calculation.
+ *
+ * Packets that won't trigger the COE e.g. most DSA-tagged packets will
+ * also have to be checksummed in software.
*/
if (csum_insertion &&
- priv->plat->tx_queues_cfg[queue].coe_unsupported) {
+ (priv->plat->tx_queues_cfg[queue].coe_unsupported ||
+ !stmmac_has_ip_ethertype(skb))) {
if (unlikely(skb_checksum_help(skb)))
goto dma_map_err;
csum_insertion = !csum_insertion;
@@ -5066,7 +5092,7 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
stmmac_rx_vlan(priv->dev, skb);
skb->protocol = eth_type_trans(skb, priv->dev);
- if (unlikely(!coe))
+ if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb))
skb_checksum_none_assert(skb);
else
skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -5589,7 +5615,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
skb->protocol = eth_type_trans(skb, priv->dev);
- if (unlikely(!coe))
+ if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb))
skb_checksum_none_assert(skb);
else
skb->ip_summed = CHECKSUM_UNNECESSARY;
---
base-commit: a23aa04042187cbde16f470b49d4ad60d32e9206
change-id: 20240108-prevent_dsa_tags-7bb0def0db81
Best regards,
--
Romain Gantois <romain.gantois(a)bootlin.com>
The LPASS WSA macro codec driver is updating the digital gain settings
behind the back of user space on DAPM events if companding has been
enabled.
As compander control is exported to user space, this can result in the
digital gain setting being incremented (or decremented) every time the
sound server is started and the codec suspended depending on what the
UCM configuration looks like.
Soon enough playback will become distorted (or too quiet).
This is specifically a problem on the Lenovo ThinkPad X13s as this
bypasses the limit for the digital gain setting that has been set by the
machine driver.
Fix this by simply dropping the compander gain hack. If someone cares
about modelling the impact of the compander setting this can possibly be
done by exporting it as a volume control later.
Fixes: 2c4066e5d428 ("ASoC: codecs: lpass-wsa-macro: add dapm widgets and route")
Cc: stable(a)vger.kernel.org # 5.11
Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
sound/soc/codecs/lpass-wsa-macro.c | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c
index 7e21cec3c2fb..7de221464d47 100644
--- a/sound/soc/codecs/lpass-wsa-macro.c
+++ b/sound/soc/codecs/lpass-wsa-macro.c
@@ -1583,8 +1583,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
u16 gain_reg;
u16 reg;
- int val;
- int offset_val = 0;
struct wsa_macro *wsa = snd_soc_component_get_drvdata(component);
if (w->shift == WSA_MACRO_COMP1) {
@@ -1623,11 +1621,7 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
CDC_WSA_RX1_RX_PATH_MIX_SEC0,
CDC_WSA_RX_PGA_HALF_DB_MASK,
CDC_WSA_RX_PGA_HALF_DB_ENABLE);
- offset_val = -2;
}
- val = snd_soc_component_read(component, gain_reg);
- val += offset_val;
- snd_soc_component_write(component, gain_reg, val);
wsa_macro_config_ear_spkr_gain(component, wsa,
event, gain_reg);
break;
@@ -1654,10 +1648,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
CDC_WSA_RX1_RX_PATH_MIX_SEC0,
CDC_WSA_RX_PGA_HALF_DB_MASK,
CDC_WSA_RX_PGA_HALF_DB_DISABLE);
- offset_val = 2;
- val = snd_soc_component_read(component, gain_reg);
- val += offset_val;
- snd_soc_component_write(component, gain_reg, val);
}
wsa_macro_config_ear_spkr_gain(component, wsa,
event, gain_reg);
--
2.41.0
The LPASS WSA macro codec driver is updating the digital gain settings
behind the back of user space on DAPM events if companding has been
enabled.
As compander control is exported to user space, this can result in the
digital gain setting being incremented (or decremented) every time the
sound server is started and the codec suspended depending on what the
UCM configuration looks like.
Soon enough playback will become distorted (or too quiet).
This is specifically a problem on the Lenovo ThinkPad X13s as this
bypasses the limit for the digital gain setting that has been set by the
machine driver.
Fix this by simply dropping the compander gain offset hack. If someone
cares about modelling the impact of the compander setting this can
possibly be done by exporting it as a volume control later.
Note that the volume registers still need to be written after enabling
clocks in order for any prior updates to take effect.
Fixes: 2c4066e5d428 ("ASoC: codecs: lpass-wsa-macro: add dapm widgets and route")
Cc: stable(a)vger.kernel.org # 5.11
Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
sound/soc/codecs/lpass-wsa-macro.c | 7 -------
1 file changed, 7 deletions(-)
diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c
index 7e21cec3c2fb..6ce309980cd1 100644
--- a/sound/soc/codecs/lpass-wsa-macro.c
+++ b/sound/soc/codecs/lpass-wsa-macro.c
@@ -1584,7 +1584,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
u16 gain_reg;
u16 reg;
int val;
- int offset_val = 0;
struct wsa_macro *wsa = snd_soc_component_get_drvdata(component);
if (w->shift == WSA_MACRO_COMP1) {
@@ -1623,10 +1622,8 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
CDC_WSA_RX1_RX_PATH_MIX_SEC0,
CDC_WSA_RX_PGA_HALF_DB_MASK,
CDC_WSA_RX_PGA_HALF_DB_ENABLE);
- offset_val = -2;
}
val = snd_soc_component_read(component, gain_reg);
- val += offset_val;
snd_soc_component_write(component, gain_reg, val);
wsa_macro_config_ear_spkr_gain(component, wsa,
event, gain_reg);
@@ -1654,10 +1651,6 @@ static int wsa_macro_enable_interpolator(struct snd_soc_dapm_widget *w,
CDC_WSA_RX1_RX_PATH_MIX_SEC0,
CDC_WSA_RX_PGA_HALF_DB_MASK,
CDC_WSA_RX_PGA_HALF_DB_DISABLE);
- offset_val = 2;
- val = snd_soc_component_read(component, gain_reg);
- val += offset_val;
- snd_soc_component_write(component, gain_reg, val);
}
wsa_macro_config_ear_spkr_gain(component, wsa,
event, gain_reg);
--
2.41.0
The current UCM configuration sets the speaker PA volume to 15 dB when
enabling the speakers but this does not prevent the user from increasing
the volume further.
Limit the PA volume to 15 dB in the machine driver to reduce the risk of
speaker damage until we have active speaker protection in place.
Note that this will probably need to be generalised using
machine-specific limits, but a common limit should do for now.
Cc: stable(a)vger.kernel.org # 6.5
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
sound/soc/qcom/sc8280xp.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c
index ed4bb551bfbb..aa43903421f5 100644
--- a/sound/soc/qcom/sc8280xp.c
+++ b/sound/soc/qcom/sc8280xp.c
@@ -32,12 +32,14 @@ static int sc8280xp_snd_init(struct snd_soc_pcm_runtime *rtd)
case WSA_CODEC_DMA_RX_0:
case WSA_CODEC_DMA_RX_1:
/*
- * set limit of 0dB on Digital Volume for Speakers,
- * this can prevent damage of speakers to some extent without
- * active speaker protection
+ * Set limit of 0 dB on Digital Volume and 15 dB on PA Volume
+ * to reduce the risk of speaker damage until we have active
+ * speaker protection in place.
*/
snd_soc_limit_volume(card, "WSA_RX0 Digital Volume", 84);
snd_soc_limit_volume(card, "WSA_RX1 Digital Volume", 84);
+ snd_soc_limit_volume(card, "SpkrLeft PA Volume", 12);
+ snd_soc_limit_volume(card, "SpkrRight PA Volume", 12);
break;
default:
break;
--
2.41.0
From: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
This reverts commit 88b065943cb583e890324d618e8d4b23460d51a3.
Lenovo 82TQ is unhappy if we do the display on sequence this
late. The display output shows severe corruption.
It's unclear if this is a failure on our part (perhaps
something to do with sending commands in LP mode after HS
/video mode transmission has been started? Though the backlight
on command at least seems to work) or simply that there are
some commands in the sequence that are needed to be done
earlier (eg. could be some DSC init stuff?). If the latter
then I don't think the current Windows code would work
either, but maybe this was originally tested with an older
driver, who knows.
Root causing this fully would likely require a lot of
experimentation which isn't really feasible without direct
access to the machine, so let's just accept failure and
go back to the original sequence.
Cc: stable(a)vger.kernel.org
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10071
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
---
drivers/gpu/drm/i915/display/icl_dsi.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index ac456a2275db..eda4a8b88590 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -1155,6 +1155,7 @@ static void gen11_dsi_powerup_panel(struct intel_encoder *encoder)
}
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_INIT_OTP);
+ intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
/* ensure all panel commands dispatched before enabling transcoder */
wait_for_cmds_dispatched_to_panel(encoder);
@@ -1255,8 +1256,6 @@ static void gen11_dsi_enable(struct intel_atomic_state *state,
/* step6d: enable dsi transcoder */
gen11_dsi_enable_transcoder(encoder);
- intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON);
-
/* step7: enable backlight */
intel_backlight_enable(crtc_state, conn_state);
intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON);
--
2.41.0
There's a warning in btrfs_issue_discard() when the range is not aligned
to 512 bytes, originally added in 4d89d377bbb0 ("btrfs:
btrfs_issue_discard ensure offset/length are aligned to sector
boundaries"). We can't do sub-sector writes anyway so the adjustment is
the only thing that we can do and the warning is unnecessary.
CC: stable(a)vger.kernel.org # 4.19+
Reported-by: syzbot+4a4f1eba14eb5c3417d1(a)syzkaller.appspotmail.com
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/extent-tree.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6d680031211a..8e8cc1111277 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1260,7 +1260,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
u64 bytes_left, end;
u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
- if (WARN_ON(start != aligned_start)) {
+ /* Adjust the range to be aligned to 512B sectors if necessary. */
+ if (start != aligned_start) {
len -= aligned_start - start;
len = round_down(len, 1 << SECTOR_SHIFT);
start = aligned_start;
--
2.42.1