Hi,
This patch-series trying to avoid issues when plock ops with
DLM_PLOCK_FL_CLOSE flag is set sends a reply back which should never be
the case. This problem getting more serious when introducing a new plock
op and an answer was not expected as
I changed in v2 to check on DLM_PLOCK_FL_CLOSE flag for stable as this
can also being used to fix the potential issue for older kernels and it
does not change the UAPI. For newer user space applications the new flag
DLM_PLOCK_FL_NO_REPLY will tell the user space application to never send
an result back, it will handle this filter earlier in user space. For
older user space software we will filter the result in ther kernel.
This requires the behaviour that the flags are the same for the request
and the reply which is the case for dlm_controld.
Also fix the wrapped string and don't spam the user ignoring replies.
- Alex
Alexander Aring (3):
fs: dlm: ignore DLM_PLOCK_FL_CLOSE flag results
fs: dlm: introduce DLM_PLOCK_FL_NO_REPLY flag
fs: dlm: allow to F_SETLKW getting interrupted
fs/dlm/plock.c | 107 ++++++++++++++++++++++++---------
include/uapi/linux/dlm_plock.h | 2 +
2 files changed, 81 insertions(+), 28 deletions(-)
--
2.31.1
This patch introduces a new flag DLM_PLOCK_FL_NO_REPLY in case an dlm
plock operation should not send a reply back. Currently this is kind of
being handled in DLM_PLOCK_FL_CLOSE, but DLM_PLOCK_FL_CLOSE has more
meanings that it will remove all waiters for a specific nodeid/owner
values in by doing a unlock operation. In case of an error in dlm user
space software e.g. dlm_controld we get an reply with an error back.
This cannot be matched because there is no op to match in recv_list. We
filter now on DLM_PLOCK_FL_NO_REPLY in case we had an error back as
reply. In newer dlm_controld version it will never send a result back
when DLM_PLOCK_FL_NO_REPLY is set. This filter is a workaround to handle
older dlm_controld versions.
Fixes: 901025d2f319 ("dlm: make plock operation killable")
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexander Aring <aahringo(a)redhat.com>
---
fs/dlm/plock.c | 23 +++++++++++++++++++----
include/uapi/linux/dlm_plock.h | 1 +
2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 70a4752ed913..7fe9f4b922d3 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -96,7 +96,7 @@ static void do_unlock_close(const struct dlm_plock_info *info)
op->info.end = OFFSET_MAX;
op->info.owner = info->owner;
- op->info.flags |= DLM_PLOCK_FL_CLOSE;
+ op->info.flags |= (DLM_PLOCK_FL_CLOSE | DLM_PLOCK_FL_NO_REPLY);
send_op(op);
}
@@ -293,7 +293,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
op->info.owner = (__u64)(long) fl->fl_owner;
if (fl->fl_flags & FL_CLOSE) {
- op->info.flags |= DLM_PLOCK_FL_CLOSE;
+ op->info.flags |= (DLM_PLOCK_FL_CLOSE | DLM_PLOCK_FL_NO_REPLY);
send_op(op);
rv = 0;
goto out;
@@ -392,7 +392,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
op = list_first_entry(&send_list, struct plock_op, list);
- if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+ if (op->info.flags & DLM_PLOCK_FL_NO_REPLY)
list_del(&op->list);
else
list_move_tail(&op->list, &recv_list);
@@ -407,7 +407,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
that were generated by the vfs cleaning up for a close
(the process did not make an unlock call). */
- if (op->info.flags & DLM_PLOCK_FL_CLOSE)
+ if (op->info.flags & DLM_PLOCK_FL_NO_REPLY)
dlm_release_plock_op(op);
if (copy_to_user(u, &info, sizeof(info)))
@@ -433,6 +433,21 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
if (check_version(&info))
return -EINVAL;
+ /* Some old dlm user space software will send replies back,
+ * even if DLM_PLOCK_FL_NO_REPLY is set (because the flag is
+ * new) e.g. if a error occur. We can't match them in recv_list
+ * because they were never be part of it. We filter it here,
+ * new dlm user space software will filter it in user space.
+ *
+ * In future this handling can be removed.
+ */
+ if (info.flags & DLM_PLOCK_FL_NO_REPLY) {
+ pr_info("Received unexpected reply from op %d, "
+ "please update DLM user space software!\n",
+ info.optype);
+ return count;
+ }
+
/*
* The results for waiting ops (SETLKW) can be returned in any
* order, so match all fields to find the op. The results for
diff --git a/include/uapi/linux/dlm_plock.h b/include/uapi/linux/dlm_plock.h
index 63b6c1fd9169..8dfa272c929a 100644
--- a/include/uapi/linux/dlm_plock.h
+++ b/include/uapi/linux/dlm_plock.h
@@ -25,6 +25,7 @@ enum {
};
#define DLM_PLOCK_FL_CLOSE 1
+#define DLM_PLOCK_FL_NO_REPLY 2
struct dlm_plock_info {
__u32 version[3];
--
2.31.1
If the code fails to add histogram to hist_vars list, then ret should
contain error code before jumping to unregister histogram.
Cc: stable(a)vger.kernel.org
Fixes: 6018b585e8c6 ("tracing/histograms: Add histograms to hist_vars if they have referenced variables")
Signed-off-by: Mohamed Khalfella <mkhalfella(a)purestorage.com>
---
kernel/trace/trace_events_hist.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index c8c61381eba4..d06938ae0717 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -6668,7 +6668,8 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
goto out_unreg;
if (has_hist_vars(hist_data) || hist_data->n_var_refs) {
- if (save_hist_vars(hist_data))
+ ret = save_hist_vars(hist_data);
+ if (ret)
goto out_unreg;
}
--
2.34.1
Commit 6018b585e8c6 ("tracing/histograms: Add histograms to hist_vars if
they have referenced variables") added a check to fail histogram creation
if save_hist_vars() failed to add histogram to hist_vars list. But the
commit failed to set ret to failed return code before jumping to
unregister histogram, fix it.
Cc: stable(a)vger.kernel.org
Fixes: 6018b585e8c6 ("tracing/histograms: Add histograms to hist_vars if they have referenced variables")
Signed-off-by: Mohamed Khalfella <mkhalfella(a)purestorage.com>
---
kernel/trace/trace_events_hist.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index c8c61381eba4..d06938ae0717 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -6668,7 +6668,8 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
goto out_unreg;
if (has_hist_vars(hist_data) || hist_data->n_var_refs) {
- if (save_hist_vars(hist_data))
+ ret = save_hist_vars(hist_data);
+ if (ret)
goto out_unreg;
}
--
2.34.1
The patch titled
Subject: lib/test_meminit: allocate pages up to order MAX_ORDER
has been added to the -mm mm-unstable branch. Its filename is
lib-test_meminit-allocate-pages-up-to-order-max_order.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Andrew Donnellan <ajd(a)linux.ibm.com>
Subject: lib/test_meminit: allocate pages up to order MAX_ORDER
Date: Fri, 14 Jul 2023 11:52:38 +1000
test_pages() tests the page allocator by calling alloc_pages() with
different orders up to order 10.
However, different architectures and platforms support different maximum
contiguous allocation sizes. The default maximum allocation order
(MAX_ORDER) is 10, but architectures can use CONFIG_ARCH_FORCE_MAX_ORDER
to override this. On platforms where this is less than 10, test_meminit()
will blow up with a WARN(). This is expected, so let's not do that.
Replace the hardcoded "10" with the MAX_ORDER macro so that we test
allocations up to the expected platform limit.
Link: https://lkml.kernel.org/r/20230714015238.47931-1-ajd@linux.ibm.com
Fixes: 5015a300a522 ("lib: introduce test_meminit module")
Signed-off-by: Andrew Donnellan <ajd(a)linux.ibm.com>
Reviewed-by: Alexander Potapenko <glider(a)google.com>
Cc: Xiaoke Wang <xkernel.wang(a)foxmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/test_meminit.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/lib/test_meminit.c~lib-test_meminit-allocate-pages-up-to-order-max_order
+++ a/lib/test_meminit.c
@@ -93,7 +93,7 @@ static int __init test_pages(int *total_
int failures = 0, num_tests = 0;
int i;
- for (i = 0; i < 10; i++)
+ for (i = 0; i <= MAX_ORDER; i++)
num_tests += do_alloc_pages_order(i, &failures);
REPORT_FAILURES_IN_FN();
_
Patches currently in -mm which might be from ajd(a)linux.ibm.com are
lib-test_meminit-allocate-pages-up-to-order-max_order.patch
D e a r Sir,
I am M r. Taras Volodymyr from U k r a i n e but lived in Russia for many years, I am a successful business?m a n here in Russia as I have been involved oil serving business.
Based on what is going here I found you very capable of handling this h u g e business magnitude, there is a genuine need for an investment of a substantial amount in your country. If you are willing to p a r t n e r with me I will advise you to get back to me for proceedings-details on the way forward.
I wish for a prompt response from you regarding my letter.
Warm regards,
Mr. Taras Volodymyr
Xiang reports that VMs occasionally fail to boot on GICv4.1 systems when
running a preemptible kernel, as it is possible that a vCPU is blocked
without requesting a doorbell interrupt.
The issue is that any preemption that occurs between vgic_v4_put() and
schedule() on the block path will mark the vPE as nonresident and *not*
request a doorbell irq. This occurs because when the vcpu thread is
resumed on its way to block, vcpu_load() will make the vPE resident
again. Once the vcpu actually blocks, we don't request a doorbell
anymore, and the vcpu won't be woken up on interrupt delivery.
Fix it by tracking that we're entering WFI, and key the doorbell
request on that flag. This allows us not to make the vPE resident
when going through a preempt/schedule cycle, meaning we don't lose
any state.
Cc: stable(a)vger.kernel.org
Fixes: 8e01d9a396e6 ("KVM: arm64: vgic-v4: Move the GICv4 residency flow to be driven by vcpu_load/put")
Reported-by: Xiang Chen <chenxiang66(a)hisilicon.com>
Suggested-by: Zenghui Yu <yuzenghui(a)huawei.com>
Tested-by: Xiang Chen <chenxiang66(a)hisilicon.com>
Co-developed-by: Oliver Upton <oliver.upton(a)linux.dev>
Signed-off-by: Oliver Upton <oliver.upton(a)linux.dev>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
---
arch/arm64/include/asm/kvm_host.h | 2 ++
arch/arm64/kvm/arm.c | 6 ++++--
arch/arm64/kvm/vgic/vgic-v3.c | 2 +-
arch/arm64/kvm/vgic/vgic-v4.c | 7 +++++--
include/kvm/arm_vgic.h | 2 +-
5 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 1e768481f62f..914fc9c26e40 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -817,6 +817,8 @@ struct kvm_vcpu_arch {
#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
/* PMUSERENR for the guest EL0 is on physical CPU */
#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
+/* WFI instruction trapped */
+#define IN_WFI __vcpu_single_flag(sflags, BIT(7))
/* vcpu entered with HCR_EL2.E2H set */
#define VCPU_HCR_E2H __vcpu_single_flag(oflags, BIT(0))
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 236c5f1c9090..cf208d30a9ea 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -725,13 +725,15 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
- vgic_v4_put(vcpu, true);
+ vcpu_set_flag(vcpu, IN_WFI);
+ vgic_v4_put(vcpu);
preempt_enable();
kvm_vcpu_halt(vcpu);
vcpu_clear_flag(vcpu, IN_WFIT);
preempt_disable();
+ vcpu_clear_flag(vcpu, IN_WFI);
vgic_v4_load(vcpu);
preempt_enable();
}
@@ -799,7 +801,7 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
/* The distributor enable bits were changed */
preempt_disable();
- vgic_v4_put(vcpu, false);
+ vgic_v4_put(vcpu);
vgic_v4_load(vcpu);
preempt_enable();
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 49d35618d576..df61ead7c757 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -780,7 +780,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
* done a vgic_v4_put) and when running a nested guest (the
* vPE was never resident in order to generate a doorbell).
*/
- WARN_ON(vgic_v4_put(vcpu, false));
+ WARN_ON(vgic_v4_put(vcpu));
vgic_v3_vmcr_sync(vcpu);
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index c1c28fe680ba..339a55194b2c 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -336,14 +336,14 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}
-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
+int vgic_v4_put(struct kvm_vcpu *vcpu)
{
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
return 0;
- return its_make_vpe_non_resident(vpe, need_db);
+ return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI));
}
int vgic_v4_load(struct kvm_vcpu *vcpu)
@@ -354,6 +354,9 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
return 0;
+ if (vcpu_get_flag(vcpu, IN_WFI))
+ return 0;
+
/*
* Before making the VPE resident, make sure the redistributor
* corresponding to our current CPU expects us here. See the
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 9b91a8135dac..765d801d1ddc 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -446,7 +446,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
int vgic_v4_load(struct kvm_vcpu *vcpu);
void vgic_v4_commit(struct kvm_vcpu *vcpu);
-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
+int vgic_v4_put(struct kvm_vcpu *vcpu);
bool vgic_state_is_nested(struct kvm_vcpu *vcpu);
--
2.34.1