The quilt patch titled
Subject: mm/shmem: fix race in shmem_undo_range w/THP
has been removed from the -mm tree. Its filename was
mm-shmem-fix-race-in-shmem_undo_range-w-thp.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: David Stevens <stevensd(a)chromium.org>
Subject: mm/shmem: fix race in shmem_undo_range w/THP
Date: Tue, 18 Apr 2023 17:40:31 +0900
Split folios during the second loop of shmem_undo_range. It's not
sufficient to only split folios when dealing with partial pages, since
it's possible for a THP to be faulted in after that point. Calling
truncate_inode_folio in that situation can result in throwing away data
outside of the range being targeted.
[akpm(a)linux-foundation.org: tidy up comment layout]
Link: https://lkml.kernel.org/r/20230418084031.3439795-1-stevensd@google.com
Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios")
Signed-off-by: David Stevens <stevensd(a)chromium.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Suleiman Souhlal <suleiman(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/shmem.c | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
--- a/mm/shmem.c~mm-shmem-fix-race-in-shmem_undo_range-w-thp
+++ a/mm/shmem.c
@@ -1080,7 +1080,24 @@ whole_folios:
}
VM_BUG_ON_FOLIO(folio_test_writeback(folio),
folio);
- truncate_inode_folio(mapping, folio);
+
+ if (!folio_test_large(folio)) {
+ truncate_inode_folio(mapping, folio);
+ } else if (truncate_inode_partial_folio(folio, lstart, lend)) {
+ /*
+ * If we split a page, reset the loop so
+ * that we pick up the new sub pages.
+ * Otherwise the THP was entirely
+ * dropped or the target range was
+ * zeroed, so just continue the loop as
+ * is.
+ */
+ if (!folio_test_large(folio)) {
+ folio_unlock(folio);
+ index = start;
+ break;
+ }
+ }
}
folio_unlock(folio);
}
_
Patches currently in -mm which might be from stevensd(a)chromium.org are
The quilt patch titled
Subject: Revert "selftests: error out if kernel header files are not yet built"
has been removed from the -mm tree. Its filename was
revert-selftests-error-out-if-kernel-header-files-are-not-yet-built.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: John Hubbard <jhubbard(a)nvidia.com>
Subject: Revert "selftests: error out if kernel header files are not yet built"
Date: Fri, 8 Dec 2023 18:01:44 -0800
This reverts commit 9fc96c7c19df ("selftests: error out if kernel header
files are not yet built").
It turns out that requiring the kernel headers to be built as a
prerequisite to building selftests, does not work in many cases. For
example, Peter Zijlstra writes:
"My biggest beef with the whole thing is that I simply do not want to use
'make headers', it doesn't work for me.
I have a ton of output directories and I don't care to build tools into
the output dirs, in fact some of them flat out refuse to work that way
(bpf comes to mind)." [1]
Therefore, stop erroring out on the selftests build. Additional patches
will be required in order to change over to not requiring the kernel
headers.
[1] https://lore.kernel.org/20231208221007.GO28727@noisy.programming.kicks-ass.…
Link: https://lkml.kernel.org/r/20231209020144.244759-1-jhubbard@nvidia.com
Fixes: 9fc96c7c19df ("selftests: error out if kernel header files are not yet built")
Signed-off-by: John Hubbard <jhubbard(a)nvidia.com>
Cc: Anders Roxell <anders.roxell(a)linaro.org>
Cc: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Nathan Chancellor <nathan(a)kernel.org>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Marcos Paulo de Souza <mpdesouza(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/Makefile | 21 ---------------
tools/testing/selftests/lib.mk | 40 ++---------------------------
2 files changed, 4 insertions(+), 57 deletions(-)
--- a/tools/testing/selftests/lib.mk~revert-selftests-error-out-if-kernel-header-files-are-not-yet-built
+++ a/tools/testing/selftests/lib.mk
@@ -44,26 +44,10 @@ endif
selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
top_srcdir = $(selfdir)/../../..
-ifeq ("$(origin O)", "command line")
- KBUILD_OUTPUT := $(O)
+ifeq ($(KHDR_INCLUDES),)
+KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include
endif
-ifneq ($(KBUILD_OUTPUT),)
- # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot
- # expand a shell special character '~'. We use a somewhat tedious way here.
- abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd)
- $(if $(abs_objtree),, \
- $(error failed to create output directory "$(KBUILD_OUTPUT)"))
- # $(realpath ...) resolves symlinks
- abs_objtree := $(realpath $(abs_objtree))
- KHDR_DIR := ${abs_objtree}/usr/include
-else
- abs_srctree := $(shell cd $(top_srcdir) && pwd)
- KHDR_DIR := ${abs_srctree}/usr/include
-endif
-
-KHDR_INCLUDES := -isystem $(KHDR_DIR)
-
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
# custom build rule and prevent common build rule use.
@@ -74,25 +58,7 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)
TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
-all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \
- $(TEST_GEN_FILES)
-
-kernel_header_files:
- @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \
- if [ $$? -ne 0 ]; then \
- RED='\033[1;31m'; \
- NOCOLOR='\033[0m'; \
- echo; \
- echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \
- echo "Please run this and try again:"; \
- echo; \
- echo " cd $(top_srcdir)"; \
- echo " make headers"; \
- echo; \
- exit 1; \
- fi
-
-.PHONY: kernel_header_files
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
define RUN_TESTS
BASE_DIR="$(selfdir)"; \
--- a/tools/testing/selftests/Makefile~revert-selftests-error-out-if-kernel-header-files-are-not-yet-built
+++ a/tools/testing/selftests/Makefile
@@ -155,12 +155,10 @@ ifneq ($(KBUILD_OUTPUT),)
abs_objtree := $(realpath $(abs_objtree))
BUILD := $(abs_objtree)/kselftest
KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include
- KHDR_DIR := ${abs_objtree}/usr/include
else
BUILD := $(CURDIR)
abs_srctree := $(shell cd $(top_srcdir) && pwd)
KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include
- KHDR_DIR := ${abs_srctree}/usr/include
DEFAULT_INSTALL_HDR_PATH := 1
endif
@@ -174,7 +172,7 @@ export KHDR_INCLUDES
# all isn't the first target in the file.
.DEFAULT_GOAL := all
-all: kernel_header_files
+all:
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
@@ -185,23 +183,6 @@ all: kernel_header_files
ret=$$((ret * $$?)); \
done; exit $$ret;
-kernel_header_files:
- @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \
- if [ $$? -ne 0 ]; then \
- RED='\033[1;31m'; \
- NOCOLOR='\033[0m'; \
- echo; \
- echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \
- echo "Please run this and try again:"; \
- echo; \
- echo " cd $(top_srcdir)"; \
- echo " make headers"; \
- echo; \
- exit 1; \
- fi
-
-.PHONY: kernel_header_files
-
run_tests: all
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
_
Patches currently in -mm which might be from jhubbard(a)nvidia.com are
The quilt patch titled
Subject: mm/damon/core: make damon_start() waits until kdamond_fn() starts
has been removed from the -mm tree. Its filename was
mm-damon-core-make-damon_start-waits-until-kdamond_fn-starts.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/core: make damon_start() waits until kdamond_fn() starts
Date: Fri, 8 Dec 2023 17:50:18 +0000
The cleanup tasks of kdamond threads including reset of corresponding
DAMON context's ->kdamond field and decrease of global nr_running_ctxs
counter is supposed to be executed by kdamond_fn(). However, commit
0f91d13366a4 ("mm/damon: simplify stop mechanism") made neither
damon_start() nor damon_stop() ensure the corresponding kdamond has
started the execution of kdamond_fn().
As a result, the cleanup can be skipped if damon_stop() is called fast
enough after the previous damon_start(). Especially the skipped reset
of ->kdamond could cause a use-after-free.
Fix it by waiting for start of kdamond_fn() execution from
damon_start().
Link: https://lkml.kernel.org/r/20231208175018.63880-1-sj@kernel.org
Fixes: 0f91d13366a4 ("mm/damon: simplify stop mechanism")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reported-by: Jakub Acs <acsjakub(a)amazon.de>
Cc: Changbin Du <changbin.du(a)intel.com>
Cc: Jakub Acs <acsjakub(a)amazon.de>
Cc: <stable(a)vger.kernel.org> # 5.15.x
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/damon.h | 2 ++
mm/damon/core.c | 6 ++++++
2 files changed, 8 insertions(+)
--- a/include/linux/damon.h~mm-damon-core-make-damon_start-waits-until-kdamond_fn-starts
+++ a/include/linux/damon.h
@@ -559,6 +559,8 @@ struct damon_ctx {
* update
*/
unsigned long next_ops_update_sis;
+ /* for waiting until the execution of the kdamond_fn is started */
+ struct completion kdamond_started;
/* public: */
struct task_struct *kdamond;
--- a/mm/damon/core.c~mm-damon-core-make-damon_start-waits-until-kdamond_fn-starts
+++ a/mm/damon/core.c
@@ -445,6 +445,8 @@ struct damon_ctx *damon_new_ctx(void)
if (!ctx)
return NULL;
+ init_completion(&ctx->kdamond_started);
+
ctx->attrs.sample_interval = 5 * 1000;
ctx->attrs.aggr_interval = 100 * 1000;
ctx->attrs.ops_update_interval = 60 * 1000 * 1000;
@@ -668,11 +670,14 @@ static int __damon_start(struct damon_ct
mutex_lock(&ctx->kdamond_lock);
if (!ctx->kdamond) {
err = 0;
+ reinit_completion(&ctx->kdamond_started);
ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
nr_running_ctxs);
if (IS_ERR(ctx->kdamond)) {
err = PTR_ERR(ctx->kdamond);
ctx->kdamond = NULL;
+ } else {
+ wait_for_completion(&ctx->kdamond_started);
}
}
mutex_unlock(&ctx->kdamond_lock);
@@ -1433,6 +1438,7 @@ static int kdamond_fn(void *data)
pr_debug("kdamond (%d) starts\n", current->pid);
+ complete(&ctx->kdamond_started);
kdamond_init_intervals_sis(ctx);
if (ctx->ops.init)
_
Patches currently in -mm which might be from sj(a)kernel.org are
selftests-damon-implement-a-python-module-for-test-purpose-damon-sysfs-controls.patch
selftests-damon-_damon_sysfs-implement-kdamonds-start-function.patch
selftests-damon-_damon_sysfs-implement-updat_schemes_tried_bytes-command.patch
selftests-damon-add-a-test-for-update_schemes_tried_regions-sysfs-command.patch
selftests-damon-add-a-test-for-update_schemes_tried_regions-hang-bug.patch
The quilt patch titled
Subject: kexec: drop dependency on ARCH_SUPPORTS_KEXEC from CRASH_DUMP
has been removed from the -mm tree. Its filename was
kexec-drop-dependency-on-arch_supports_kexec-from-crash_dump.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ignat Korchagin <ignat(a)cloudflare.com>
Subject: kexec: drop dependency on ARCH_SUPPORTS_KEXEC from CRASH_DUMP
Date: Wed, 29 Nov 2023 22:04:09 +0000
In commit f8ff23429c62 ("kernel/Kconfig.kexec: drop select of KEXEC for
CRASH_DUMP") we tried to fix a config regression, where CONFIG_CRASH_DUMP
required CONFIG_KEXEC.
However, it was not enough at least for arm64 platforms. While further
testing the patch with our arm64 config I noticed that CONFIG_CRASH_DUMP
is unavailable in menuconfig. This is because CONFIG_CRASH_DUMP still
depends on the new CONFIG_ARCH_SUPPORTS_KEXEC introduced in commit
91506f7e5d21 ("arm64/kexec: refactor for kernel/Kconfig.kexec") and on
arm64 CONFIG_ARCH_SUPPORTS_KEXEC requires CONFIG_PM_SLEEP_SMP=y, which in
turn requires either CONFIG_SUSPEND=y or CONFIG_HIBERNATION=y neither of
which are set in our config.
Given that we already established that CONFIG_KEXEC (which is a switch for
kexec system call itself) is not required for CONFIG_CRASH_DUMP drop
CONFIG_ARCH_SUPPORTS_KEXEC dependency as well. The arm64 kernel builds
just fine with CONFIG_CRASH_DUMP=y and with both CONFIG_KEXEC=n and
CONFIG_KEXEC_FILE=n after f8ff23429c62 ("kernel/Kconfig.kexec: drop select
of KEXEC for CRASH_DUMP") and this patch are applied given that the
necessary shared bits are included via CONFIG_KEXEC_CORE dependency.
[bhe(a)redhat.com: don't export some symbols when CONFIG_MMU=n]
Link: https://lkml.kernel.org/r/ZW03ODUKGGhP1ZGU@MiWiFi-R3L-srv
[bhe(a)redhat.com: riscv, kexec: fix dependency of two items]
Link: https://lkml.kernel.org/r/ZW04G/SKnhbE5mnX@MiWiFi-R3L-srv
Link: https://lkml.kernel.org/r/20231129220409.55006-1-ignat@cloudflare.com
Fixes: 91506f7e5d21 ("arm64/kexec: refactor for kernel/Kconfig.kexec")
Signed-off-by: Ignat Korchagin <ignat(a)cloudflare.com>
Signed-off-by: Baoquan He <bhe(a)redhat.com>
Acked-by: Baoquan He <bhe(a)redhat.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org> # 6.6+: f8ff234: kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP
Cc: <stable(a)vger.kernel.org> # 6.6+
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/riscv/Kconfig | 4 ++--
arch/riscv/kernel/crash_core.c | 4 +++-
kernel/Kconfig.kexec | 1 -
3 files changed, 5 insertions(+), 4 deletions(-)
--- a/arch/riscv/Kconfig~kexec-drop-dependency-on-arch_supports_kexec-from-crash_dump
+++ a/arch/riscv/Kconfig
@@ -685,7 +685,7 @@ config RISCV_BOOT_SPINWAIT
If unsure what to do here, say N.
config ARCH_SUPPORTS_KEXEC
- def_bool MMU
+ def_bool y
config ARCH_SELECTS_KEXEC
def_bool y
@@ -693,7 +693,7 @@ config ARCH_SELECTS_KEXEC
select HOTPLUG_CPU if SMP
config ARCH_SUPPORTS_KEXEC_FILE
- def_bool 64BIT && MMU
+ def_bool 64BIT
config ARCH_SELECTS_KEXEC_FILE
def_bool y
--- a/arch/riscv/kernel/crash_core.c~kexec-drop-dependency-on-arch_supports_kexec-from-crash_dump
+++ a/arch/riscv/kernel/crash_core.c
@@ -5,18 +5,20 @@
void arch_crash_save_vmcoreinfo(void)
{
- VMCOREINFO_NUMBER(VA_BITS);
VMCOREINFO_NUMBER(phys_ram_base);
vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET);
vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
+#ifdef CONFIG_MMU
+ VMCOREINFO_NUMBER(VA_BITS);
vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
#ifdef CONFIG_64BIT
vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
#endif
+#endif
vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR);
vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n",
kernel_map.va_kernel_pa_offset);
--- a/kernel/Kconfig.kexec~kexec-drop-dependency-on-arch_supports_kexec-from-crash_dump
+++ a/kernel/Kconfig.kexec
@@ -94,7 +94,6 @@ config KEXEC_JUMP
config CRASH_DUMP
bool "kernel crash dumps"
depends on ARCH_SUPPORTS_CRASH_DUMP
- depends on ARCH_SUPPORTS_KEXEC
select CRASH_CORE
select KEXEC_CORE
help
_
Patches currently in -mm which might be from ignat(a)cloudflare.com are
When a queue is unbound from the vfio_ap device driver, if that queue is
assigned to a guest's AP configuration, its associated adapter is removed
because queues are defined to a guest via a matrix of adapters and
domains; so, it is not possible to remove a single queue.
If an adapter is removed from the guest's AP configuration, all associated
queues must be reset to prevent leaking crypto data should any of them be
assigned to a different guest or device driver. The one caveat is that if
the queue is being removed because the adapter or domain has been removed
from the host's AP configuration, then an attempt to reset the queue will
fail with response code 01, AP-queue number not valid; so resetting these
queues should be skipped.
Acked-by: Halil Pasic <pasic(a)linux.ibm.com>
Signed-off-by: Tony Krowiak <akrowiak(a)linux.ibm.com>
Fixes: 09d31ff78793 ("s390/vfio-ap: hot plug/unplug of AP devices when probed/removed")
Cc: <stable(a)vger.kernel.org>
---
drivers/s390/crypto/vfio_ap_ops.c | 78 ++++++++++++++++---------------
1 file changed, 41 insertions(+), 37 deletions(-)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 11f8f0bcc7ed..e014108067dc 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -935,45 +935,45 @@ static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev,
AP_MKQID(apid, apqi));
}
+static void collect_queues_to_reset(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apid,
+ struct list_head *qlist)
+{
+ struct vfio_ap_queue *q;
+ unsigned long apqi;
+
+ for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS) {
+ q = vfio_ap_mdev_get_queue(matrix_mdev, AP_MKQID(apid, apqi));
+ if (q)
+ list_add_tail(&q->reset_qnode, qlist);
+ }
+}
+
+static void reset_queues_for_apid(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long apid)
+{
+ struct list_head qlist;
+
+ INIT_LIST_HEAD(&qlist);
+ collect_queues_to_reset(matrix_mdev, apid, &qlist);
+ vfio_ap_mdev_reset_qlist(&qlist);
+}
+
static int reset_queues_for_apids(struct ap_matrix_mdev *matrix_mdev,
unsigned long *apm_reset)
{
- struct vfio_ap_queue *q, *tmpq;
struct list_head qlist;
- unsigned long apid, apqi;
- int apqn, ret = 0;
+ unsigned long apid;
if (bitmap_empty(apm_reset, AP_DEVICES))
return 0;
INIT_LIST_HEAD(&qlist);
- for_each_set_bit_inv(apid, apm_reset, AP_DEVICES) {
- for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm,
- AP_DOMAINS) {
- /*
- * If the domain is not in the host's AP configuration,
- * then resetting it will fail with response code 01
- * (APQN not valid).
- */
- if (!test_bit_inv(apqi,
- (unsigned long *)matrix_dev->info.aqm))
- continue;
-
- apqn = AP_MKQID(apid, apqi);
- q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
-
- if (q)
- list_add_tail(&q->reset_qnode, &qlist);
- }
- }
+ for_each_set_bit_inv(apid, apm_reset, AP_DEVICES)
+ collect_queues_to_reset(matrix_mdev, apid, &qlist);
- ret = vfio_ap_mdev_reset_qlist(&qlist);
-
- list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode)
- list_del(&q->reset_qnode);
-
- return ret;
+ return vfio_ap_mdev_reset_qlist(&qlist);
}
/**
@@ -2199,24 +2199,28 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
matrix_mdev = q->matrix_mdev;
if (matrix_mdev) {
- vfio_ap_unlink_queue_fr_mdev(q);
-
- apid = AP_QID_CARD(q->apqn);
- apqi = AP_QID_QUEUE(q->apqn);
-
- /*
- * If the queue is assigned to the guest's APCB, then remove
- * the adapter's APID from the APCB and hot it into the guest.
- */
+ /* If the queue is assigned to the guest's AP configuration */
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
+ /*
+ * Since the queues are defined via a matrix of adapters
+ * and domains, it is not possible to hot unplug a
+ * single queue; so, let's unplug the adapter.
+ */
clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apid(matrix_mdev, apid);
+ goto done;
}
}
vfio_ap_mdev_reset_queue(q);
flush_work(&q->reset_work);
+
+done:
+ if (matrix_mdev)
+ vfio_ap_unlink_queue_fr_mdev(q);
+
dev_set_drvdata(&apdev->device, NULL);
kfree(q);
release_update_locks_for_mdev(matrix_mdev);
--
2.43.0
When filtering the adapters from the configuration profile for a guest to
create or update a guest's AP configuration, if the APID of an adapter and
the APQI of a domain identify a queue device that is not bound to the
vfio_ap device driver, the APID of the adapter will be filtered because an
individual APQN can not be filtered due to the fact the APQNs are assigned
to an AP configuration as a matrix of APIDs and APQIs. Consequently, a
guest will not have access to all of the queues associated with the
filtered adapter. If the queues are subsequently made available again to
the guest, they should re-appear in a reset state; so, let's make sure all
queues associated with an adapter unplugged from the guest are reset.
In order to identify the set of queues that need to be reset, let's allow a
vfio_ap_queue object to be simultaneously stored in both a hashtable and a
list: A hashtable used to store all of the queues assigned
to a matrix mdev; and/or, a list used to store a subset of the queues that
need to be reset. For example, when an adapter is hot unplugged from a
guest, all guest queues associated with that adapter must be reset. Since
that may be a subset of those assigned to the matrix mdev, they can be
stored in a list that can be passed to the vfio_ap_mdev_reset_queues
function.
Signed-off-by: Tony Krowiak <akrowiak(a)linux.ibm.com>
Acked-by: Halil Pasic <pasic(a)linux.ibm.com>
Fixes: 48cae940c31d ("s390/vfio-ap: refresh guest's APCB by filtering AP resources assigned to mdev")
Cc: <stable(a)vger.kernel.org>
---
drivers/s390/crypto/vfio_ap_ops.c | 171 +++++++++++++++++++-------
drivers/s390/crypto/vfio_ap_private.h | 11 +-
2 files changed, 133 insertions(+), 49 deletions(-)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 26bd4aca497a..11f8f0bcc7ed 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -32,7 +32,8 @@
#define AP_RESET_INTERVAL 20 /* Reset sleep interval (20ms) */
-static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
+static int vfio_ap_mdev_reset_qlist(struct list_head *qlist);
static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
@@ -661,16 +662,23 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev)
* device driver.
*
* @matrix_mdev: the matrix mdev whose matrix is to be filtered.
+ * @apm_filtered: a 256-bit bitmap for storing the APIDs filtered from the
+ * guest's AP configuration that are still in the host's AP
+ * configuration.
*
* Note: If an APQN referencing a queue device that is not bound to the vfio_ap
* driver, its APID will be filtered from the guest's APCB. The matrix
* structure precludes filtering an individual APQN, so its APID will be
- * filtered.
+ * filtered. Consequently, all queues associated with the adapter that
+ * are in the host's AP configuration must be reset. If queues are
+ * subsequently made available again to the guest, they should re-appear
+ * in a reset state
*
* Return: a boolean value indicating whether the KVM guest's APCB was changed
* by the filtering or not.
*/
-static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
+static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *apm_filtered)
{
unsigned long apid, apqi, apqn;
DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES);
@@ -680,6 +688,7 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES);
bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS);
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb);
+ bitmap_clear(apm_filtered, 0, AP_DEVICES);
/*
* Copy the adapters, domains and control domains to the shadow_apcb
@@ -705,8 +714,16 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
apqn = AP_MKQID(apid, apqi);
q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
if (!q || q->reset_status.response_code) {
- clear_bit_inv(apid,
- matrix_mdev->shadow_apcb.apm);
+ clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
+
+ /*
+ * If the adapter was previously plugged into
+ * the guest, let's let the caller know that
+ * the APID was filtered.
+ */
+ if (test_bit_inv(apid, prev_shadow_apm))
+ set_bit_inv(apid, apm_filtered);
+
break;
}
}
@@ -808,7 +825,7 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev)
mutex_lock(&matrix_dev->guests_lock);
mutex_lock(&matrix_dev->mdevs_lock);
- vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ vfio_ap_mdev_reset_queues(matrix_mdev);
vfio_ap_mdev_unlink_fr_queues(matrix_mdev);
list_del(&matrix_mdev->node);
mutex_unlock(&matrix_dev->mdevs_lock);
@@ -918,6 +935,47 @@ static void vfio_ap_mdev_link_adapter(struct ap_matrix_mdev *matrix_mdev,
AP_MKQID(apid, apqi));
}
+static int reset_queues_for_apids(struct ap_matrix_mdev *matrix_mdev,
+ unsigned long *apm_reset)
+{
+ struct vfio_ap_queue *q, *tmpq;
+ struct list_head qlist;
+ unsigned long apid, apqi;
+ int apqn, ret = 0;
+
+ if (bitmap_empty(apm_reset, AP_DEVICES))
+ return 0;
+
+ INIT_LIST_HEAD(&qlist);
+
+ for_each_set_bit_inv(apid, apm_reset, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm,
+ AP_DOMAINS) {
+ /*
+ * If the domain is not in the host's AP configuration,
+ * then resetting it will fail with response code 01
+ * (APQN not valid).
+ */
+ if (!test_bit_inv(apqi,
+ (unsigned long *)matrix_dev->info.aqm))
+ continue;
+
+ apqn = AP_MKQID(apid, apqi);
+ q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
+
+ if (q)
+ list_add_tail(&q->reset_qnode, &qlist);
+ }
+ }
+
+ ret = vfio_ap_mdev_reset_qlist(&qlist);
+
+ list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode)
+ list_del(&q->reset_qnode);
+
+ return ret;
+}
+
/**
* assign_adapter_store - parses the APID from @buf and sets the
* corresponding bit in the mediated matrix device's APM
@@ -958,6 +1016,7 @@ static ssize_t assign_adapter_store(struct device *dev,
{
int ret;
unsigned long apid;
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@@ -987,8 +1046,10 @@ static ssize_t assign_adapter_store(struct device *dev,
vfio_ap_mdev_link_adapter(matrix_mdev, apid);
- if (vfio_ap_mdev_filter_matrix(matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+ }
ret = count;
done:
@@ -1019,11 +1080,12 @@ static struct vfio_ap_queue
* adapter was assigned.
* @matrix_mdev: the matrix mediated device to which the adapter was assigned.
* @apid: the APID of the unassigned adapter.
- * @qtable: table for storing queues associated with unassigned adapter.
+ * @qlist: list for storing queues associated with unassigned adapter that
+ * need to be reset.
*/
static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid,
- struct ap_queue_table *qtable)
+ struct list_head *qlist)
{
unsigned long apqi;
struct vfio_ap_queue *q;
@@ -1031,11 +1093,10 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) {
q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
- if (q && qtable) {
+ if (q && qlist) {
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
- hash_add(qtable->queues, &q->mdev_qnode,
- q->apqn);
+ list_add_tail(&q->reset_qnode, qlist);
}
}
}
@@ -1043,26 +1104,23 @@ static void vfio_ap_mdev_unlink_adapter(struct ap_matrix_mdev *matrix_mdev,
static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev,
unsigned long apid)
{
- int loop_cursor;
- struct vfio_ap_queue *q;
- struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
+ struct vfio_ap_queue *q, *tmpq;
+ struct list_head qlist;
- hash_init(qtable->queues);
- vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, qtable);
+ INIT_LIST_HEAD(&qlist);
+ vfio_ap_mdev_unlink_adapter(matrix_mdev, apid, &qlist);
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) {
clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
}
- vfio_ap_mdev_reset_queues(qtable);
+ vfio_ap_mdev_reset_qlist(&qlist);
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) {
vfio_ap_unlink_mdev_fr_queue(q);
- hash_del(&q->mdev_qnode);
+ list_del(&q->reset_qnode);
}
-
- kfree(qtable);
}
/**
@@ -1163,6 +1221,7 @@ static ssize_t assign_domain_store(struct device *dev,
{
int ret;
unsigned long apqi;
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@@ -1192,8 +1251,10 @@ static ssize_t assign_domain_store(struct device *dev,
vfio_ap_mdev_link_domain(matrix_mdev, apqi);
- if (vfio_ap_mdev_filter_matrix(matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+ }
ret = count;
done:
@@ -1206,7 +1267,7 @@ static DEVICE_ATTR_WO(assign_domain);
static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
unsigned long apqi,
- struct ap_queue_table *qtable)
+ struct list_head *qlist)
{
unsigned long apid;
struct vfio_ap_queue *q;
@@ -1214,11 +1275,10 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) {
q = vfio_ap_unlink_apqn_fr_mdev(matrix_mdev, apid, apqi);
- if (q && qtable) {
+ if (q && qlist) {
if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm) &&
test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm))
- hash_add(qtable->queues, &q->mdev_qnode,
- q->apqn);
+ list_add_tail(&q->reset_qnode, qlist);
}
}
}
@@ -1226,26 +1286,23 @@ static void vfio_ap_mdev_unlink_domain(struct ap_matrix_mdev *matrix_mdev,
static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev,
unsigned long apqi)
{
- int loop_cursor;
- struct vfio_ap_queue *q;
- struct ap_queue_table *qtable = kzalloc(sizeof(*qtable), GFP_KERNEL);
+ struct vfio_ap_queue *q, *tmpq;
+ struct list_head qlist;
- hash_init(qtable->queues);
- vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, qtable);
+ INIT_LIST_HEAD(&qlist);
+ vfio_ap_mdev_unlink_domain(matrix_mdev, apqi, &qlist);
if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) {
clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm);
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
}
- vfio_ap_mdev_reset_queues(qtable);
+ vfio_ap_mdev_reset_qlist(&qlist);
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ list_for_each_entry_safe(q, tmpq, &qlist, reset_qnode) {
vfio_ap_unlink_mdev_fr_queue(q);
- hash_del(&q->mdev_qnode);
+ list_del(&q->reset_qnode);
}
-
- kfree(qtable);
}
/**
@@ -1600,7 +1657,7 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
get_update_locks_for_kvm(kvm);
kvm_arch_crypto_clear_masks(kvm);
- vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ vfio_ap_mdev_reset_queues(matrix_mdev);
kvm_put_kvm(kvm);
matrix_mdev->kvm = NULL;
@@ -1736,15 +1793,33 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
}
}
-static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
+static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
{
int ret = 0, loop_cursor;
struct vfio_ap_queue *q;
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
+ hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode)
vfio_ap_mdev_reset_queue(q);
- hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+ hash_for_each(matrix_mdev->qtable.queues, loop_cursor, q, mdev_qnode) {
+ flush_work(&q->reset_work);
+
+ if (q->reset_status.response_code)
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
+static int vfio_ap_mdev_reset_qlist(struct list_head *qlist)
+{
+ int ret = 0;
+ struct vfio_ap_queue *q;
+
+ list_for_each_entry(q, qlist, reset_qnode)
+ vfio_ap_mdev_reset_queue(q);
+
+ list_for_each_entry(q, qlist, reset_qnode) {
flush_work(&q->reset_work);
if (q->reset_status.response_code)
@@ -1930,7 +2005,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
ret = vfio_ap_mdev_get_device_info(arg);
break;
case VFIO_DEVICE_RESET:
- ret = vfio_ap_mdev_reset_queues(&matrix_mdev->qtable);
+ ret = vfio_ap_mdev_reset_queues(matrix_mdev);
break;
case VFIO_DEVICE_GET_IRQ_INFO:
ret = vfio_ap_get_irq_info(arg);
@@ -2062,6 +2137,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
{
int ret;
struct vfio_ap_queue *q;
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev;
ret = sysfs_create_group(&apdev->device.kobj, &vfio_queue_attr_group);
@@ -2094,15 +2170,17 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
!bitmap_empty(matrix_mdev->aqm_add, AP_DOMAINS))
goto done;
- if (vfio_ap_mdev_filter_matrix(matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered)) {
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+ }
}
done:
dev_set_drvdata(&apdev->device, q);
release_update_locks_for_mdev(matrix_mdev);
- return 0;
+ return ret;
err_remove_group:
sysfs_remove_group(&apdev->device.kobj, &vfio_queue_attr_group);
@@ -2446,6 +2524,7 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info,
static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
{
+ DECLARE_BITMAP(apm_filtered, AP_DEVICES);
bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false;
mutex_lock(&matrix_mdev->kvm->lock);
@@ -2459,7 +2538,7 @@ static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
matrix_mdev->adm_add, AP_DOMAINS);
if (filter_adapters || filter_domains)
- do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev);
+ do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev, apm_filtered);
if (filter_cdoms)
do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev);
@@ -2467,6 +2546,8 @@ static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
if (do_hotplug)
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
+ reset_queues_for_apids(matrix_mdev, apm_filtered);
+
mutex_unlock(&matrix_dev->mdevs_lock);
mutex_unlock(&matrix_mdev->kvm->lock);
}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index 88aff8b81f2f..20eac8b0f0b9 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -83,10 +83,10 @@ struct ap_matrix {
};
/**
- * struct ap_queue_table - a table of queue objects.
- *
- * @queues: a hashtable of queues (struct vfio_ap_queue).
- */
+ * struct ap_queue_table - a table of queue objects.
+ *
+ * @queues: a hashtable of queues (struct vfio_ap_queue).
+ */
struct ap_queue_table {
DECLARE_HASHTABLE(queues, 8);
};
@@ -133,6 +133,8 @@ struct ap_matrix_mdev {
* @apqn: the APQN of the AP queue device
* @saved_isc: the guest ISC registered with the GIB interface
* @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
+ * @reset_qnode: allows the vfio_ap_queue struct to be added to a list of queues
+ * that need to be reset
* @reset_status: the status from the last reset of the queue
* @reset_work: work to wait for queue reset to complete
*/
@@ -143,6 +145,7 @@ struct vfio_ap_queue {
#define VFIO_AP_ISC_INVALID 0xff
unsigned char saved_isc;
struct hlist_node mdev_qnode;
+ struct list_head reset_qnode;
struct ap_queue_status reset_status;
struct work_struct reset_work;
};
--
2.43.0
While filtering the mdev matrix, it doesn't make sense - and will have
unexpected results - to filter an APID from the matrix if the APID or one
of the associated APQIs is not in the host's AP configuration. There are
two reasons for this:
1. An adapter or domain that is not in the host's AP configuration can be
assigned to the matrix; this is known as over-provisioning. Queue
devices, however, are only created for adapters and domains in the
host's AP configuration, so there will be no queues associated with an
over-provisioned adapter or domain to filter.
2. The adapter or domain may have been externally removed from the host's
configuration via an SE or HMC attached to a DPM enabled LPAR. In this
case, the vfio_ap device driver would have been notified by the AP bus
via the on_config_changed callback and the adapter or domain would
have already been filtered.
Since the matrix_mdev->shadow_apcb.apm and matrix_mdev->shadow_apcb.aqm are
copied from the mdev matrix sans the APIDs and APQIs not in the host's AP
configuration, let's loop over those bitmaps instead of those assigned to
the matrix.
Signed-off-by: Tony Krowiak <akrowiak(a)linux.ibm.com>
Reviewed-by: Halil Pasic <pasic(a)linux.ibm.com>
Fixes: 48cae940c31d ("s390/vfio-ap: refresh guest's APCB by filtering AP resources assigned to mdev")
Cc: <stable(a)vger.kernel.org>
---
drivers/s390/crypto/vfio_ap_ops.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 9382b32e5bd1..47232e19a50e 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -691,8 +691,9 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm,
(unsigned long *)matrix_dev->info.aqm, AP_DOMAINS);
- for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) {
- for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) {
+ for_each_set_bit_inv(apid, matrix_mdev->shadow_apcb.apm, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm,
+ AP_DOMAINS) {
/*
* If the APQN is not bound to the vfio_ap device
* driver, then we can't assign it to the guest's
--
2.43.0
The vfio_ap_mdev_filter_matrix function is called whenever a new adapter or
domain is assigned to the mdev. The purpose of the function is to update
the guest's AP configuration by filtering the matrix of adapters and
domains assigned to the mdev. When an adapter or domain is assigned, only
the APQNs associated with the APID of the new adapter or APQI of the new
domain are inspected. If an APQN does not reference a queue device bound to
the vfio_ap device driver, then it's APID will be filtered from the mdev's
matrix when updating the guest's AP configuration.
Inspecting only the APID of the new adapter or APQI of the new domain will
result in passing AP queues through to a guest that are not bound to the
vfio_ap device driver under certain circumstances. Consider the following:
guest's AP configuration (all also assigned to the mdev's matrix):
14.0004
14.0005
14.0006
16.0004
16.0005
16.0006
unassign domain 4
unbind queue 16.0005
assign domain 4
When domain 4 is re-assigned, since only domain 4 will be inspected, the
APQNs that will be examined will be:
14.0004
16.0004
Since both of those APQNs reference queue devices that are bound to the
vfio_ap device driver, nothing will get filtered from the mdev's matrix
when updating the guest's AP configuration. Consequently, queue 16.0005
will get passed through despite not being bound to the driver. This
violates the linux device model requirement that a guest shall only be
given access to devices bound to the device driver facilitating their
pass-through.
To resolve this problem, every adapter and domain assigned to the mdev will
be inspected when filtering the mdev's matrix.
Signed-off-by: Tony Krowiak <akrowiak(a)linux.ibm.com>
Acked-by: Halil Pasic <pasic(a)linux.ibm.com>
Fixes: 48cae940c31d ("s390/vfio-ap: refresh guest's APCB by filtering AP resources assigned to mdev")
Cc: <stable(a)vger.kernel.org>
---
drivers/s390/crypto/vfio_ap_ops.c | 57 +++++++++----------------------
1 file changed, 17 insertions(+), 40 deletions(-)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 4db538a55192..9382b32e5bd1 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -670,8 +670,7 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev)
* Return: a boolean value indicating whether the KVM guest's APCB was changed
* by the filtering or not.
*/
-static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
- struct ap_matrix_mdev *matrix_mdev)
+static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev)
{
unsigned long apid, apqi, apqn;
DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES);
@@ -692,8 +691,8 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
bitmap_and(matrix_mdev->shadow_apcb.aqm, matrix_mdev->matrix.aqm,
(unsigned long *)matrix_dev->info.aqm, AP_DOMAINS);
- for_each_set_bit_inv(apid, apm, AP_DEVICES) {
- for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) {
+ for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, AP_DEVICES) {
+ for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, AP_DOMAINS) {
/*
* If the APQN is not bound to the vfio_ap device
* driver, then we can't assign it to the guest's
@@ -958,7 +957,6 @@ static ssize_t assign_adapter_store(struct device *dev,
{
int ret;
unsigned long apid;
- DECLARE_BITMAP(apm_delta, AP_DEVICES);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@@ -987,11 +985,8 @@ static ssize_t assign_adapter_store(struct device *dev,
}
vfio_ap_mdev_link_adapter(matrix_mdev, apid);
- memset(apm_delta, 0, sizeof(apm_delta));
- set_bit_inv(apid, apm_delta);
- if (vfio_ap_mdev_filter_matrix(apm_delta,
- matrix_mdev->matrix.aqm, matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev))
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
ret = count;
@@ -1167,7 +1162,6 @@ static ssize_t assign_domain_store(struct device *dev,
{
int ret;
unsigned long apqi;
- DECLARE_BITMAP(aqm_delta, AP_DOMAINS);
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
mutex_lock(&ap_perms_mutex);
@@ -1196,11 +1190,8 @@ static ssize_t assign_domain_store(struct device *dev,
}
vfio_ap_mdev_link_domain(matrix_mdev, apqi);
- memset(aqm_delta, 0, sizeof(aqm_delta));
- set_bit_inv(apqi, aqm_delta);
- if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm_delta,
- matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev))
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
ret = count;
@@ -2091,9 +2082,7 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
if (matrix_mdev) {
vfio_ap_mdev_link_queue(matrix_mdev, q);
- if (vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm,
- matrix_mdev->matrix.aqm,
- matrix_mdev))
+ if (vfio_ap_mdev_filter_matrix(matrix_mdev))
vfio_ap_mdev_update_guest_apcb(matrix_mdev);
}
dev_set_drvdata(&apdev->device, q);
@@ -2443,34 +2432,22 @@ void vfio_ap_on_cfg_changed(struct ap_config_info *cur_cfg_info,
static void vfio_ap_mdev_hot_plug_cfg(struct ap_matrix_mdev *matrix_mdev)
{
- bool do_hotplug = false;
- int filter_domains = 0;
- int filter_adapters = 0;
- DECLARE_BITMAP(apm, AP_DEVICES);
- DECLARE_BITMAP(aqm, AP_DOMAINS);
+ bool filter_domains, filter_adapters, filter_cdoms, do_hotplug = false;
mutex_lock(&matrix_mdev->kvm->lock);
mutex_lock(&matrix_dev->mdevs_lock);
- filter_adapters = bitmap_and(apm, matrix_mdev->matrix.apm,
- matrix_mdev->apm_add, AP_DEVICES);
- filter_domains = bitmap_and(aqm, matrix_mdev->matrix.aqm,
- matrix_mdev->aqm_add, AP_DOMAINS);
-
- if (filter_adapters && filter_domains)
- do_hotplug |= vfio_ap_mdev_filter_matrix(apm, aqm, matrix_mdev);
- else if (filter_adapters)
- do_hotplug |=
- vfio_ap_mdev_filter_matrix(apm,
- matrix_mdev->shadow_apcb.aqm,
- matrix_mdev);
- else
- do_hotplug |=
- vfio_ap_mdev_filter_matrix(matrix_mdev->shadow_apcb.apm,
- aqm, matrix_mdev);
+ filter_adapters = bitmap_intersects(matrix_mdev->matrix.apm,
+ matrix_mdev->apm_add, AP_DEVICES);
+ filter_domains = bitmap_intersects(matrix_mdev->matrix.aqm,
+ matrix_mdev->aqm_add, AP_DOMAINS);
+ filter_cdoms = bitmap_intersects(matrix_mdev->matrix.adm,
+ matrix_mdev->adm_add, AP_DOMAINS);
+
+ if (filter_adapters || filter_domains)
+ do_hotplug = vfio_ap_mdev_filter_matrix(matrix_mdev);
- if (bitmap_intersects(matrix_mdev->matrix.adm, matrix_mdev->adm_add,
- AP_DOMAINS))
+ if (filter_cdoms)
do_hotplug |= vfio_ap_mdev_filter_cdoms(matrix_mdev);
if (do_hotplug)
--
2.43.0