The first thing the active retirement worker does is decrement the
i915_active count.
The first thing we do during i915_active_wait is try to increment the
i915_active count, but only if already active [non-zero].
The wait may see that the retirement is already started and so marked the
i915_active as idle, and skip waiting for the retirement handler.
However, the caller of i915_active_wait may immediately free the
i915_active upon returning (e.g. i915_vma_destroy) so we must not return
before the concurrent access from the worker are completed. We must
always flush the worker.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2473
Fixes: 274cbf20fd10 ("drm/i915: Push the i915_active.retire into a worker")
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v5.5+
---
drivers/gpu/drm/i915/i915_active.c | 28 +++++++++++++++-------------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index ab4382841c6b..3bc616cc1ad2 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -628,24 +628,26 @@ static int flush_lazy_signals(struct i915_active *ref)
int __i915_active_wait(struct i915_active *ref, int state)
{
- int err;
-
might_sleep();
- if (!i915_active_acquire_if_busy(ref))
- return 0;
-
/* Any fence added after the wait begins will not be auto-signaled */
- err = flush_lazy_signals(ref);
- i915_active_release(ref);
- if (err)
- return err;
+ if (i915_active_acquire_if_busy(ref)) {
+ int err;
- if (!i915_active_is_idle(ref) &&
- ___wait_var_event(ref, i915_active_is_idle(ref),
- state, 0, 0, schedule()))
- return -EINTR;
+ err = flush_lazy_signals(ref);
+ i915_active_release(ref);
+ if (err)
+ return err;
+ if (___wait_var_event(ref, i915_active_is_idle(ref),
+ state, 0, 0, schedule()))
+ return -EINTR;
+ }
+
+ /*
+ * After the wait is complete, the caller may free the active.
+ * We have to flush any concurrent retirement before returning.
+ */
flush_work(&ref->work);
return 0;
}
--
2.20.1
The vfio_ap device driver registers a group notifier with VFIO when the
file descriptor for a VFIO mediated device for a KVM guest is opened to
receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
event). When the KVM pointer is set, the vfio_ap driver takes the
following actions:
1. Stashes the KVM pointer in the vfio_ap_mdev struct that holds the state
of the mediated device.
2. Calls the kvm_get_kvm() function to increment its reference counter.
3. Sets the function pointer to the function that handles interception of
the instruction that enables/disables interrupt processing.
4. Sets the masks in the KVM guest's CRYCB to pass AP resources through to
the guest.
In order to avoid memory leaks, when the notifier is called to receive
notification that the KVM pointer has been set to NULL, the vfio_ap device
driver should reverse the actions taken when the KVM pointer was set.
Fixes: 258287c994de ("s390: vfio-ap: implement mediated device open callback")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tony Krowiak <akrowiak(a)linux.ibm.com>
Reviewed-by: Halil Pasic <pasic(a)linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
---
drivers/s390/crypto/vfio_ap_ops.c | 49 ++++++++++++++++++-------------
1 file changed, 28 insertions(+), 21 deletions(-)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index e0bde8518745..7339043906cf 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1037,19 +1037,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
{
struct ap_matrix_mdev *m;
- mutex_lock(&matrix_dev->lock);
-
list_for_each_entry(m, &matrix_dev->mdev_list, node) {
- if ((m != matrix_mdev) && (m->kvm == kvm)) {
- mutex_unlock(&matrix_dev->lock);
+ if ((m != matrix_mdev) && (m->kvm == kvm))
return -EPERM;
- }
}
matrix_mdev->kvm = kvm;
kvm_get_kvm(kvm);
kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
- mutex_unlock(&matrix_dev->lock);
return 0;
}
@@ -1083,35 +1078,52 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
return NOTIFY_DONE;
}
+static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
+{
+ kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
+ matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
+ vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
+ kvm_put_kvm(matrix_mdev->kvm);
+ matrix_mdev->kvm = NULL;
+}
+
static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
- int ret;
+ int ret, notify_rc = NOTIFY_OK;
struct ap_matrix_mdev *matrix_mdev;
if (action != VFIO_GROUP_NOTIFY_SET_KVM)
return NOTIFY_OK;
matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
+ mutex_lock(&matrix_dev->lock);
if (!data) {
- matrix_mdev->kvm = NULL;
- return NOTIFY_OK;
+ if (matrix_mdev->kvm)
+ vfio_ap_mdev_unset_kvm(matrix_mdev);
+ goto notify_done;
}
ret = vfio_ap_mdev_set_kvm(matrix_mdev, data);
- if (ret)
- return NOTIFY_DONE;
+ if (ret) {
+ notify_rc = NOTIFY_DONE;
+ goto notify_done;
+ }
/* If there is no CRYCB pointer, then we can't copy the masks */
- if (!matrix_mdev->kvm->arch.crypto.crycbd)
- return NOTIFY_DONE;
+ if (!matrix_mdev->kvm->arch.crypto.crycbd) {
+ notify_rc = NOTIFY_DONE;
+ goto notify_done;
+ }
kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm,
matrix_mdev->matrix.aqm,
matrix_mdev->matrix.adm);
- return NOTIFY_OK;
+notify_done:
+ mutex_unlock(&matrix_dev->lock);
+ return notify_rc;
}
static void vfio_ap_irq_disable_apqn(int apqn)
@@ -1222,13 +1234,8 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
mutex_lock(&matrix_dev->lock);
- if (matrix_mdev->kvm) {
- kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
- matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
- vfio_ap_mdev_reset_queues(mdev);
- kvm_put_kvm(matrix_mdev->kvm);
- matrix_mdev->kvm = NULL;
- }
+ if (matrix_mdev->kvm)
+ vfio_ap_mdev_unset_kvm(matrix_mdev);
mutex_unlock(&matrix_dev->lock);
vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
--
2.21.1
The accelerated, instruction based implementations of SHA1, SHA2 and
SHA3 are autoloaded based on CPU capabilities, given that the code is
modest in size, and widely used, which means that resolving the algo
name, loading all compatible modules and picking the one with the
highest priority is taken to be suboptimal.
However, if these algorithms are requested before this CPU feature
based matching and autoloading occurs, these modules are not even
considered, and we end up with suboptimal performance.
So add the missing module aliases for the various SHA implementations.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Ard Biesheuvel <ardb(a)kernel.org>
---
arch/arm64/crypto/sha1-ce-glue.c | 1 +
arch/arm64/crypto/sha2-ce-glue.c | 2 ++
arch/arm64/crypto/sha3-ce-glue.c | 4 ++++
arch/arm64/crypto/sha512-ce-glue.c | 2 ++
4 files changed, 9 insertions(+)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index c93121bcfdeb..c1362861765f 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -19,6 +19,7 @@
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel(a)linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha1");
struct sha1_ce_state {
struct sha1_state sst;
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 31ba3da5e61b..ded3a6488f81 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -19,6 +19,8 @@
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel(a)linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha256");
struct sha256_ce_state {
struct sha256_state sst;
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index e5a2936f0886..7288d3046354 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -23,6 +23,10 @@
MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel(a)linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha3-224");
+MODULE_ALIAS_CRYPTO("sha3-256");
+MODULE_ALIAS_CRYPTO("sha3-384");
+MODULE_ALIAS_CRYPTO("sha3-512");
asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks,
int md_len);
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index faa83f6cf376..a6b1adf31c56 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -23,6 +23,8 @@
MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel(a)linaro.org>");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
int blocks);
--
2.17.1
The function ovl_dir_real_file() currently uses the semaphore of the
inode to synchronize write to the upperfile cache field.
However, this function will get called by ovl_ioctl_set_flags(), which
utilizes the inode semaphore too. In this case ovl_dir_real_file() will
try to claim a lock that is owned by a function in its call stack, which
won't get released before ovl_dir_real_file() returns.
Define a dedicated semaphore for the upperfile cache, so that the
deadlock won't happen.
Fixes: 61536bed2149 ("ovl: support [S|G]ETFLAGS and FS[S|G]ETXATTR ioctls for directories")
Cc: stable(a)vger.kernel.org # v5.10
Signed-off-by: Icenowy Zheng <icenowy(a)aosc.io>
---
Changes in v2:
- Fixed missing replacement in error handling path.
Changes in v3:
- Use mutex instead of semaphore.
fs/overlayfs/readdir.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 01620ebae1bd..3980f9982f34 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -56,6 +56,7 @@ struct ovl_dir_file {
struct list_head *cursor;
struct file *realfile;
struct file *upperfile;
+ struct mutex upperfile_mutex;
};
static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
@@ -874,8 +875,6 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
* Need to check if we started out being a lower dir, but got copied up
*/
if (!od->is_upper) {
- struct inode *inode = file_inode(file);
-
realfile = READ_ONCE(od->upperfile);
if (!realfile) {
struct path upperpath;
@@ -883,10 +882,10 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
ovl_path_upper(dentry, &upperpath);
realfile = ovl_dir_open_realfile(file, &upperpath);
- inode_lock(inode);
+ mutex_lock(&od->upperfile_mutex);
if (!od->upperfile) {
if (IS_ERR(realfile)) {
- inode_unlock(inode);
+ mutex_unlock(&od->upperfile_mutex);
return realfile;
}
smp_store_release(&od->upperfile, realfile);
@@ -896,7 +895,7 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
fput(realfile);
realfile = od->upperfile;
}
- inode_unlock(inode);
+ mutex_unlock(&od->upperfile_mutex);
}
}
@@ -959,6 +958,7 @@ static int ovl_dir_open(struct inode *inode, struct file *file)
od->realfile = realfile;
od->is_real = ovl_dir_is_real(file->f_path.dentry);
od->is_upper = OVL_TYPE_UPPER(type);
+ mutex_init(&od->upperfile_mutex);
file->private_data = od;
return 0;
--
2.28.0
On Fri, Nov 20, 2020 at 4:28 AM Saeed Mirzamohammadi
<saeed.mirzamohammadi(a)oracle.com> wrote:
>
> Hi,
>
> And I think crashkernel=auto could be used as an indicator that user
> want the kernel to control the crashkernel size, so some further work
> could be done to adjust the crashkernel more accordingly. eg. when
> memory encryption is enabled, increase the crashkernel value for the
> auto estimation, as it's known to consume more crashkernel memory.
>
> Thanks for the suggestion! I tried to keep it simple and leave it to the user to change Kconfig in case a different range is needed. Based on experience, these ranges work well for most of the regular cases.
Yes, I think the current implementation is a very good start.
There are some use cases, where kernel is expected to reserve more memory, like:
- when memory encryption is enabled, an extra swiotlb size of memory
should be reserved
- on pcc, fadump will expect more memory to be reserved
I believe there are a lot more cases like these.
I tried to come up with some patches to let the kernel reserve more
memory automatically, when such conditions are detected, but changing
the crashkernel= specified value is really weird.
But if we have a crashkernel=auto, then kernel automatically reserve
more memory will make sense.
> But why not make it arch-independent? This crashkernel=auto idea
> should simply work with every arch.
>
>
> Thanks! I’ll be making it arch-independent in the v2 patch.
>
>
> #include <asm/page.h>
> #include <asm/sections.h>
> @@ -41,6 +42,15 @@ static int __init parse_crashkernel_mem(char *cmdline,
> unsigned long long *crash_base)
> {
> char *cur = cmdline, *tmp;
> + unsigned long long total_mem = system_ram;
> +
> + /*
> + * Firmware sometimes reserves some memory regions for it's own use.
> + * so we get less than actual system memory size.
> + * Workaround this by round up the total size to 128M which is
> + * enough for most test cases.
> + */
> + total_mem = roundup(total_mem, SZ_128M);
>
>
> I think this rounding may be better moved to the arch specified part
> where parse_crashkernel is called?
>
>
> Thanks for the suggestion. Could you please elaborate why do we need to do that?
Every arch gets their total memory value using different methods,
(just check every parse_crashkernel call, and the system_ram param is
filled in many different ways), so I'm really not sure if this
rounding is always suitable.
>
> Thanks,
> Saeed
>
>
--
Best Regards,
Kairui Song