The patch titled
Subject: kernel: make groups_sort calling a responsibility group_info allocators
has been added to the -mm tree. Its filename is
kernel-make-groups_sort-calling-a-responsibility-group_info-allocators.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/kernel-make-groups_sort-calling-a-…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/kernel-make-groups_sort-calling-a-…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/SubmitChecklist when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Thiago Rafael Becker <thiago.becker(a)gmail.com>
Subject: kernel: make groups_sort calling a responsibility group_info allocators
In testing, we found that nfsd threads may call set_groups in parallel for
the same entry cached in auth.unix.gid, racing in the call of groups_sort,
corrupting the groups for that entry and leading to permission denials for
the client.
This patch:
- Make groups_sort globally visible.
- Move the call to groups_sort to the modifiers of group_info
- Remove the call to groups_sort from set_groups
Link: http://lkml.kernel.org/r/20171211151420.18655-1-thiago.becker@gmail.com
Signed-off-by: Thiago Rafael Becker <thiago.becker(a)gmail.com>
Reviewed-by: Matthew Wilcox <mawilcox(a)microsoft.com>
Reviewed-by: NeilBrown <neilb(a)suse.com>
Acked-by: "J. Bruce Fields" <bfields(a)fieldses.org>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Martin Schwidefsky <schwidefsky(a)de.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/s390/kernel/compat_linux.c | 1 +
fs/nfsd/auth.c | 3 +++
include/linux/cred.h | 1 +
kernel/groups.c | 5 +++--
kernel/uid16.c | 1 +
net/sunrpc/auth_gss/gss_rpc_xdr.c | 1 +
net/sunrpc/auth_gss/svcauth_gss.c | 1 +
net/sunrpc/svcauth_unix.c | 2 ++
8 files changed, 13 insertions(+), 2 deletions(-)
diff -puN arch/s390/kernel/compat_linux.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators arch/s390/kernel/compat_linux.c
--- a/arch/s390/kernel/compat_linux.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators
+++ a/arch/s390/kernel/compat_linux.c
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16,
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff -puN fs/nfsd/auth.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators fs/nfsd/auth.c
--- a/fs/nfsd/auth.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators
+++ a/fs/nfsd/auth.c
@@ -60,6 +60,9 @@ int nfsd_setuser(struct svc_rqst *rqstp,
gi->gid[i] = exp->ex_anon_gid;
else
gi->gid[i] = rqgi->gid[i];
+
+ /* Each thread allocates its own gi, no race */
+ groups_sort(gi);
}
} else {
gi = get_group_info(rqgi);
diff -puN include/linux/cred.h~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators include/linux/cred.h
--- a/include/linux/cred.h~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators
+++ a/include/linux/cred.h
@@ -83,6 +83,7 @@ extern int set_current_groups(struct gro
extern void set_groups(struct cred *, struct group_info *);
extern int groups_search(const struct group_info *, kgid_t);
extern bool may_setgroups(void);
+extern void groups_sort(struct group_info *);
/*
* The security context of a task
diff -puN kernel/groups.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators kernel/groups.c
--- a/kernel/groups.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators
+++ a/kernel/groups.c
@@ -86,11 +86,12 @@ static int gid_cmp(const void *_a, const
return gid_gt(a, b) - gid_lt(a, b);
}
-static void groups_sort(struct group_info *group_info)
+void groups_sort(struct group_info *group_info)
{
sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid),
gid_cmp, NULL);
}
+EXPORT_SYMBOL(groups_sort);
/* a simple bsearch */
int groups_search(const struct group_info *group_info, kgid_t grp)
@@ -122,7 +123,6 @@ int groups_search(const struct group_inf
void set_groups(struct cred *new, struct group_info *group_info)
{
put_group_info(new->group_info);
- groups_sort(group_info);
get_group_info(group_info);
new->group_info = group_info;
}
@@ -206,6 +206,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsi
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff -puN kernel/uid16.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators kernel/uid16.c
--- a/kernel/uid16.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators
+++ a/kernel/uid16.c
@@ -192,6 +192,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidset
return retval;
}
+ groups_sort(group_info);
retval = set_current_groups(group_info);
put_group_info(group_info);
diff -puN net/sunrpc/auth_gss/gss_rpc_xdr.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators net/sunrpc/auth_gss/gss_rpc_xdr.c
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators
+++ a/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -231,6 +231,7 @@ static int gssx_dec_linux_creds(struct x
goto out_free_groups;
creds->cr_group_info->gid[i] = kgid;
}
+ groups_sort(creds->cr_group_info);
return 0;
out_free_groups:
diff -puN net/sunrpc/auth_gss/svcauth_gss.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators net/sunrpc/auth_gss/svcauth_gss.c
--- a/net/sunrpc/auth_gss/svcauth_gss.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators
+++ a/net/sunrpc/auth_gss/svcauth_gss.c
@@ -481,6 +481,7 @@ static int rsc_parse(struct cache_detail
goto out;
rsci.cred.cr_group_info->gid[i] = kgid;
}
+ groups_sort(rsci.cred.cr_group_info);
/* mech name */
len = qword_get(&mesg, buf, mlen);
diff -puN net/sunrpc/svcauth_unix.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators net/sunrpc/svcauth_unix.c
--- a/net/sunrpc/svcauth_unix.c~kernel-make-groups_sort-calling-a-responsibility-group_info-allocators
+++ a/net/sunrpc/svcauth_unix.c
@@ -520,6 +520,7 @@ static int unix_gid_parse(struct cache_d
ug.gi->gid[i] = kgid;
}
+ groups_sort(ug.gi);
ugp = unix_gid_lookup(cd, uid);
if (ugp) {
struct cache_head *ch;
@@ -819,6 +820,7 @@ svcauth_unix_accept(struct svc_rqst *rqs
kgid_t kgid = make_kgid(&init_user_ns, svc_getnl(argv));
cred->cr_group_info->gid[i] = kgid;
}
+ groups_sort(cred->cr_group_info);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
_
Patches currently in -mm which might be from thiago.becker(a)gmail.com are
kernel-make-groups_sort-calling-a-responsibility-group_info-allocators.patch
Hi Eric,
A kernel bug report was opened against Ubuntu [0]. It was found that
reverting the following commit resolved this bug:
commit b2504a5dbef3305ef41988ad270b0e8ec289331c
Author: Eric Dumazet <edumazet(a)google.com>
Date: Tue Jan 31 10:20:32 2017 -0800
net: reduce skb_warn_bad_offload() noise
The regression was introduced as of v4.11-rc1 and still exists in
current mainline.
I was hoping to get your feedback, since you are the patch author. Do
you think gathering any additional data will help diagnose this issue,
or would it be best to submit a revert request?
This commit did in fact resolve another bug[1], but in the process
introduced this regression.
Thanks,
Joe
[0] http://pad.lv/1715609
[1] http://pad.lv/1705447
Hi,
This series corrects a number of issues with NT_PRFPREG regset, most
importantly an FCSR access API regression introduced with the addition of
MSA support, and then a few smaller issues with the get/set handlers.
I have decided to factor out non-MSA and MSA context helpers as the first
step to avoid the issue with excessive indentation that would inevitably
happen if the regression fix was applied to current code as it stands.
It shouldn't be a big deal with backporting as this code hasn't changed
much since the regression, and it will make any future bacports easier.
Only a call to `init_fp_ctx' will have to be trivially resolved (though
arguably commit ac9ad83bc318 ("MIPS: prevent FP context set via ptrace
being discarded"), which has added `init_fp_ctx', would be good to
backport as far as possible instead).
These changes have been verified by examining the register state recorded
in core dumps manually with GDB, as well as by running the GDB test suite.
No user of ptrace(2) PTRACE_GETREGSET and PTRACE_SETREGSET requests is
known for the MIPS port, so this part remains not covered, however it is
assumed to remain consistent with how the creation of core file works.
See individual patch descriptions for further details, and for changes
made since v1 to address concerns raised in the review.
Maciej
Hi,
On Tue, Dec 12, 2017 at 3:06 AM, Felipe Balbi <balbi(a)kernel.org> wrote:
>
> Hi,
>
> Douglas Anderson <dianders(a)chromium.org> writes:
>> On rk3288-veyron devices on Chrome OS it was found that plugging in an
>> Arduino-based USB device could cause the system to lockup, especially
>> if the CPU Frequency was at one of the slower operating points (like
>> 100 MHz / 200 MHz).
>>
>> Upon tracing, I found that the following was happening:
>> * The USB device (full speed) was connected to a high speed hub and
>> then to the rk3288. Thus, we were dealing with split transactions,
>> which is all handled in software on dwc2.
>> * Userspace was initiating a BULK IN transfer
>> * When we sent the SSPLIT (to start the split transaction), we got an
>> ACK. Good. Then we issued the CSPLIT.
>> * When we sent the CSPLIT, we got back a NAK. We immediately (from
>> the interrupt handler) started to retry and sent another SSPLIT.
>> * The device kept NAKing our CSPLIT, so we kept ping-ponging between
>> sending a SSPLIT and a CSPLIT, each time sending from the interrupt
>> handler.
>> * The handling of the interrupts was (because of the low CPU speed and
>> the inefficiency of the dwc2 interrupt handler) was actually taking
>> _longer_ than it took the other side to send the ACK/NAK. Thus we
>> were _always_ in the USB interrupt routine.
>> * The fact that USB interrupts were always going off was preventing
>> other things from happening in the system. This included preventing
>> the system from being able to transition to a higher CPU frequency.
>>
>> As I understand it, there is no requirement to retry super quickly
>> after a NAK, we just have to retry sometime in the future. Thus one
>> solution to the above is to just add a delay between getting a NAK and
>> retrying the transmission. If this delay is sufficiently long to get
>> out of the interrupt routine then the rest of the system will be able
>> to make forward progress. Even a 25 us delay would probably be
>> enough, but we'll be extra conservative and try to delay 1 ms (the
>> exact amount depends on HZ and the accuracy of the jiffy and how close
>> the current jiffy is to ticking, but could be as much as 20 ms or as
>> little as 1 ms).
>>
>> Presumably adding a delay like this could impact the USB throughput,
>> so we only add the delay with repeated NAKs.
>>
>> NOTE: Upon further testing of a pl2303 serial adapter, I found that
>> this fix may help with problems there. Specifically I found that the
>> pl2303 serial adapters tend to respond with a NAK when they have
>> nothing to say and thus we end with this same sequence.
>>
>> Signed-off-by: Douglas Anderson <dianders(a)chromium.org>
>> Cc: stable(a)vger.kernel.org
>> Reviewed-by: Julius Werner <jwerner(a)chromium.org>
>> Tested-by: Stefan Wahren <stefan.wahren(a)i2se.com>
>
> This seems too big for -rc or -stable inclusion.
I've removed the stable tag at your request. I originally added it at
your request in response to v2 of this patch. I'd agree that it's a
pretty big patch and therefore "risky" to pick back to stable. ...but
it does fix a bug reported by several people on the mailing lists, so
I'll leave it to your discretion. Previously in relation to the
stable tag, I had mentioned:
It's a little weird since it doesn't "fix" any specific
commit, so I guess it will be up to stable folks to decide how far to
go back. The dwc2 devices I work with are actually on 3.14, but we
have some pretty massive backports related to dwc2 there...
> In any case, this
> doesn't apply to my testing/next branch. Care to rebase and collect acks
> you received while doing that?
Sure, no problem. I've posted v4 with John Youn's Ack. The reason v3
didn't apply is that you've now got commit e99e88a9d2b0 ("treewide:
setup_timer() -> timer_setup()"). Originally my plan was to beat that
patch into the kernel and then I'd do the timer conversion myself.
That was patch #2 in the v3 series, AKA
<https://patchwork.kernel.org/patch/10032935/>. ...but since I failed
to beat Kees' patch in, I've now squashed patches #1 and #2 together
and resolved the trivial conflict.
If anyone were thinking of trying to backport this patch to older
kernels (where they presumably don't have Kees's timer patch) they can
always use the v3 patch posted here as a reference for how to make
things work. ;)
-Doug
Using PGDIR_SHIFT to identify espfix64 addresses on 5-level systems
was wrong, and it resulted in panics due to unhandled double faults.
Use P4D_SHIFT instead, which is correct on 4-level and 5-level
machines.
This fixes a panic when running x86 selftests on 5-level machines.
Fixes: 1d33b219563f ("x86/espfix: Add support for 5-level paging")
Cc: stable(a)vger.kernel.org
Cc: "Kirill A. Shutemov" <kirill(a)shutemov.name>
Cc: Dave Hansen <dave.hansen(a)intel.com>
Signed-off-by: Andy Lutomirski <luto(a)kernel.org>
---
arch/x86/kernel/traps.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 74136fd16f49..c4e5b0a7516f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -360,7 +360,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
*
* No need for ist_enter here because we don't use RCU.
*/
- if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
+ if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
regs->cs == __KERNEL_CS &&
regs->ip == (unsigned long)native_irq_return_iret)
{
--
2.13.6
From: Marc Zyngier <marc.zyngier(a)arm.com>
Commit 5553b142be11e794ebc0805950b2e8313f93d718 upstream.
VTTBR_BADDR_MASK is used to sanity check the size and alignment of the
VTTBR address. It seems to currently be off by one, thereby only
allowing up to 39-bit addresses (instead of 40-bit) and also
insufficiently checking the alignment. This patch fixes it.
This patch is the 32bit pendent of Kristina's arm64 fix, and
she deserves the actual kudos for pinpointing that one.
Fixes: f7ed45be3ba52 ("KVM: ARM: World-switch implementation")
Cc: <stable(a)vger.kernel.org> # 3.9
Reported-by: Kristina Martsenko <kristina.martsenko(a)arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall(a)linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
arch/arm/include/asm/kvm_arm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 816db0bf2dd8..46b336df4ec1 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
#else
#define VTTBR_X (5 - KVM_T0SZ)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (48LLU)
#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT)
--
2.14.2
From: Marc Zyngier <marc.zyngier(a)arm.com>
Commit 5553b142be11e794ebc0805950b2e8313f93d718 upstream.
VTTBR_BADDR_MASK is used to sanity check the size and alignment of the
VTTBR address. It seems to currently be off by one, thereby only
allowing up to 39-bit addresses (instead of 40-bit) and also
insufficiently checking the alignment. This patch fixes it.
This patch is the 32bit pendent of Kristina's arm64 fix, and
she deserves the actual kudos for pinpointing that one.
Fixes: f7ed45be3ba52 ("KVM: ARM: World-switch implementation")
Cc: <stable(a)vger.kernel.org> # 3.9
Reported-by: Kristina Martsenko <kristina.martsenko(a)arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall(a)linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
arch/arm/include/asm/kvm_arm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index dc641ddf0784..0f2720713492 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -161,8 +161,7 @@
#else
#define VTTBR_X (5 - KVM_T0SZ)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (48LLU)
#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT)
--
2.14.2
From: Kristina Martsenko <kristina.martsenko(a)arm.com>
Commit 26aa7b3b1c0fb3f1a6176a0c1847204ef4355693 upstream.
VTTBR_BADDR_MASK is used to sanity check the size and alignment of the
VTTBR address. It seems to currently be off by one, thereby only
allowing up to 47-bit addresses (instead of 48-bit) and also
insufficiently checking the alignment. This patch fixes it.
As an example, with 4k pages, before this patch we have:
PHYS_MASK_SHIFT = 48
VTTBR_X = 37 - 24 = 13
VTTBR_BADDR_SHIFT = 13 - 1 = 12
VTTBR_BADDR_MASK = ((1 << 35) - 1) << 12 = 0x00007ffffffff000
Which is wrong, because the mask doesn't allow bit 47 of the VTTBR
address to be set, and only requires the address to be 12-bit (4k)
aligned, while it actually needs to be 13-bit (8k) aligned because we
concatenate two 4k tables.
With this patch, the mask becomes 0x0000ffffffffe000, which is what we
want.
Fixes: 0369f6a34b9f ("arm64: KVM: EL2 register definitions")
Cc: <stable(a)vger.kernel.org> # 3.11.x
Reviewed-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall(a)linaro.org>
Signed-off-by: Kristina Martsenko <kristina.martsenko(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
arch/arm64/include/asm/kvm_arm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 8afb863f5a9e..333ddd45dd1f 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -160,8 +160,7 @@
#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT)
--
2.14.2
From: Marc Zyngier <marc.zyngier(a)arm.com>
Commit 64afe6e9eb4841f35317da4393de21a047a883b3 upstream.
The current pending table parsing code assumes that we keep the
previous read of the pending bits, but keep that variable in
the current block, making sure it is discarded on each loop.
We end-up using whatever is on the stack. Who knows, it might
just be the right thing...
Fixes: 33d3bc9556a7d ("KVM: arm64: vgic-its: Read initial LPI pending table")
Cc: <stable(a)vger.kernel.org> # 4.8
Reported-by: AKASHI Takahiro <takahiro.akashi(a)linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall(a)linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
virt/kvm/arm/vgic/vgic-its.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 4660a7d04eea..bbd4a988e8c1 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -322,6 +322,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
int ret = 0;
u32 *intids;
int nr_irqs, i;
+ u8 pendmask;
nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids);
if (nr_irqs < 0)
@@ -329,7 +330,6 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
for (i = 0; i < nr_irqs; i++) {
int byte_offset, bit_nr;
- u8 pendmask;
byte_offset = intids[i] / BITS_PER_BYTE;
bit_nr = intids[i] % BITS_PER_BYTE;
--
2.14.2
From: Kristina Martsenko <kristina.martsenko(a)arm.com>
Commit 26aa7b3b1c0fb3f1a6176a0c1847204ef4355693 upstream.
VTTBR_BADDR_MASK is used to sanity check the size and alignment of the
VTTBR address. It seems to currently be off by one, thereby only
allowing up to 47-bit addresses (instead of 48-bit) and also
insufficiently checking the alignment. This patch fixes it.
As an example, with 4k pages, before this patch we have:
PHYS_MASK_SHIFT = 48
VTTBR_X = 37 - 24 = 13
VTTBR_BADDR_SHIFT = 13 - 1 = 12
VTTBR_BADDR_MASK = ((1 << 35) - 1) << 12 = 0x00007ffffffff000
Which is wrong, because the mask doesn't allow bit 47 of the VTTBR
address to be set, and only requires the address to be 12-bit (4k)
aligned, while it actually needs to be 13-bit (8k) aligned because we
concatenate two 4k tables.
With this patch, the mask becomes 0x0000ffffffffe000, which is what we
want.
Fixes: 0369f6a34b9f ("arm64: KVM: EL2 register definitions")
Cc: <stable(a)vger.kernel.org> # 3.11.x
Reviewed-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall(a)linaro.org>
Signed-off-by: Kristina Martsenko <kristina.martsenko(a)arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier(a)arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall(a)linaro.org>
---
arch/arm64/include/asm/kvm_arm.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 2d960f8588b0..ef8e13d379cb 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -164,8 +164,7 @@
#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
#endif
-#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_X)
#define VTTBR_VMID_SHIFT (UL(48))
#define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT)
--
2.14.2
This is a note to let you know that I've just added the patch titled
zram: set physical queue limits to avoid array out of bounds accesses
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
zram-set-physical-queue-limits-to-avoid-array-out-of-bounds-accesses.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Johannes Thumshirn <jthumshirn(a)suse.de>
Date: Mon, 6 Mar 2017 11:23:35 +0100
Subject: zram: set physical queue limits to avoid array out of bounds accesses
From: Johannes Thumshirn <jthumshirn(a)suse.de>
[ Upstream commit 0bc315381fe9ed9fb91db8b0e82171b645ac008f ]
zram can handle at most SECTORS_PER_PAGE sectors in a bio's bvec. When using
the NVMe over Fabrics loopback target which potentially sends a huge bulk of
pages attached to the bio's bvec this results in a kernel panic because of
array out of bounds accesses in zram_decompress_page().
Signed-off-by: Johannes Thumshirn <jthumshirn(a)suse.de>
Reviewed-by: Hannes Reinecke <hare(a)suse.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)fb.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/block/zram/zram_drv.c | 2 ++
1 file changed, 2 insertions(+)
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1247,6 +1247,8 @@ static int zram_add(void)
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
+ zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
+ zram->disk->queue->limits.chunk_sectors = 0;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
/*
* zram_bio_discard() will clear all logical blocks if logical block
Patches currently in stable-queue which might be from jthumshirn(a)suse.de are
queue-4.4/zram-set-physical-queue-limits-to-avoid-array-out-of-bounds-accesses.patch
This is a note to let you know that I've just added the patch titled
xfrm: Copy policy family in clone_policy
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
xfrm-copy-policy-family-in-clone_policy.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Herbert Xu <herbert(a)gondor.apana.org.au>
Date: Fri, 10 Nov 2017 14:14:06 +1100
Subject: xfrm: Copy policy family in clone_policy
From: Herbert Xu <herbert(a)gondor.apana.org.au>
[ Upstream commit 0e74aa1d79a5bbc663e03a2804399cae418a0321 ]
The syzbot found an ancient bug in the IPsec code. When we cloned
a socket policy (for example, for a child TCP socket derived from a
listening socket), we did not copy the family field. This results
in a live policy with a zero family field. This triggers a BUG_ON
check in the af_key code when the cloned policy is retrieved.
This patch fixes it by copying the family field over.
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/xfrm/xfrm_policy.c | 1 +
1 file changed, 1 insertion(+)
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1361,6 +1361,7 @@ static struct xfrm_policy *clone_policy(
newp->xfrm_nr = old->xfrm_nr;
newp->index = old->index;
newp->type = old->type;
+ newp->family = old->family;
memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl));
write_lock_bh(&net->xfrm.xfrm_policy_lock);
Patches currently in stable-queue which might be from herbert(a)gondor.apana.org.au are
queue-4.4/xfrm-copy-policy-family-in-clone_policy.patch
queue-4.4/crypto-s5p-sss-fix-completing-crypto-request-in-irq-handler.patch
This is a note to let you know that I've just added the patch titled
x86/hpet: Prevent might sleep splat on resume
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
x86-hpet-prevent-might-sleep-splat-on-resume.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Thomas Gleixner <tglx(a)linutronix.de>
Date: Wed, 1 Mar 2017 21:10:17 +0100
Subject: x86/hpet: Prevent might sleep splat on resume
From: Thomas Gleixner <tglx(a)linutronix.de>
[ Upstream commit bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 ]
Sergey reported a might sleep warning triggered from the hpet resume
path. It's caused by the call to disable_irq() from interrupt disabled
context.
The problem with the low level resume code is that it is not accounted as a
special system_state like we do during the boot process. Calling the same
code during system boot would not trigger the warning. That's inconsistent
at best.
In this particular case it's trivial to replace the disable_irq() with
disable_hardirq() because this particular code path is solely used from
system resume and the involved hpet interrupts can never be force threaded.
Reported-and-tested-by: Sergey Senozhatsky <sergey.senozhatsky.work(a)gmail.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: "Rafael J. Wysocki" <rjw(a)sisk.pl>
Cc: Sergey Senozhatsky <sergey.senozhatsky(a)gmail.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/x86/kernel/hpet.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -353,7 +353,7 @@ static int hpet_resume(struct clock_even
irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
- disable_irq(hdev->irq);
+ disable_hardirq(hdev->irq);
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
enable_irq(hdev->irq);
}
Patches currently in stable-queue which might be from tglx(a)linutronix.de are
queue-4.4/efi-esrt-use-memunmap-instead-of-kfree-to-free-the-remapping.patch
queue-4.4/x86-hpet-prevent-might-sleep-splat-on-resume.patch
queue-4.4/efi-move-some-sysfs-files-to-be-read-only-by-root.patch
queue-4.4/sparc64-mm-set-fields-in-deferred-pages.patch
queue-4.4/jump_label-invoke-jump_label_test-via-early_initcall.patch
queue-4.4/x86-pci-make-broadcom_postcore_init-check-acpi_disabled.patch
This is a note to let you know that I've just added the patch titled
workqueue: trigger WARN if queue_delayed_work() is called with NULL @wq
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
workqueue-trigger-warn-if-queue_delayed_work-is-called-with-null-wq.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Tejun Heo <tj(a)kernel.org>
Date: Mon, 6 Mar 2017 15:33:42 -0500
Subject: workqueue: trigger WARN if queue_delayed_work() is called with NULL @wq
From: Tejun Heo <tj(a)kernel.org>
[ Upstream commit 637fdbae60d6cb9f6e963c1079d7e0445c86ff7d ]
If queue_delayed_work() gets called with NULL @wq, the kernel will
oops asynchronuosly on timer expiration which isn't too helpful in
tracking down the offender. This actually happened with smc.
__queue_delayed_work() already does several input sanity checks
synchronously. Add NULL @wq check.
Reported-by: Dave Jones <davej(a)codemonkey.org.uk>
Link: http://lkml.kernel.org/r/20170227171439.jshx3qplflyrgcv7@codemonkey.org.uk
Signed-off-by: Tejun Heo <tj(a)kernel.org>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
kernel/workqueue.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1479,6 +1479,7 @@ static void __queue_delayed_work(int cpu
struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work;
+ WARN_ON_ONCE(!wq);
WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
timer->data != (unsigned long)dwork);
WARN_ON_ONCE(timer_pending(timer));
Patches currently in stable-queue which might be from tj(a)kernel.org are
queue-4.4/libata-drop-warn-from-protocol-error-in-ata_sff_qc_issue.patch
queue-4.4/workqueue-trigger-warn-if-queue_delayed_work-is-called-with-null-wq.patch
This is a note to let you know that I've just added the patch titled
vti6: Don't report path MTU below IPV6_MIN_MTU.
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
vti6-don-t-report-path-mtu-below-ipv6_min_mtu.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Steffen Klassert <steffen.klassert(a)secunet.com>
Date: Wed, 15 Feb 2017 11:38:58 +0100
Subject: vti6: Don't report path MTU below IPV6_MIN_MTU.
From: Steffen Klassert <steffen.klassert(a)secunet.com>
[ Upstream commit e3dc847a5f85b43ee2bfc8eae407a7e383483228 ]
In vti6_xmit(), the check for IPV6_MIN_MTU before we
send a ICMPV6_PKT_TOOBIG message is missing. So we might
report a PMTU below 1280. Fix this by adding the required
check.
Fixes: ccd740cbc6e ("vti6: Add pmtu handling to vti6_xmit.")
Signed-off-by: Steffen Klassert <steffen.klassert(a)secunet.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv6/ip6_vti.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -474,11 +474,15 @@ vti6_xmit(struct sk_buff *skb, struct ne
if (!skb->ignore_df && skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu);
- if (skb->protocol == htons(ETH_P_IPV6))
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- else
+ } else {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
+ }
return -EMSGSIZE;
}
Patches currently in stable-queue which might be from steffen.klassert(a)secunet.com are
queue-4.4/xfrm-copy-policy-family-in-clone_policy.patch
queue-4.4/vti6-don-t-report-path-mtu-below-ipv6_min_mtu.patch
This is a note to let you know that I've just added the patch titled
usb: gadget: configs: plug memory leak
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
usb-gadget-configs-plug-memory-leak.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: John Keeping <john(a)metanate.com>
Date: Tue, 28 Feb 2017 10:55:30 +0000
Subject: usb: gadget: configs: plug memory leak
From: John Keeping <john(a)metanate.com>
[ Upstream commit 38355b2a44776c25b0f2ad466e8c51bb805b3032 ]
When binding a gadget to a device, "name" is stored in gi->udc_name, but
this does not happen when unregistering and the string is leaked.
Signed-off-by: John Keeping <john(a)metanate.com>
Signed-off-by: Felipe Balbi <felipe.balbi(a)linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/gadget/configfs.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -270,6 +270,7 @@ static ssize_t gadget_dev_desc_UDC_store
ret = unregister_gadget(gi);
if (ret)
goto err;
+ kfree(name);
} else {
if (gi->udc_name) {
ret = -EBUSY;
Patches currently in stable-queue which might be from john(a)metanate.com are
queue-4.4/usb-gadget-configs-plug-memory-leak.patch
This is a note to let you know that I've just added the patch titled
USB: gadgetfs: Fix a potential memory leak in 'dev_config()'
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
usb-gadgetfs-fix-a-potential-memory-leak-in-dev_config.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Date: Tue, 21 Feb 2017 22:33:11 +0100
Subject: USB: gadgetfs: Fix a potential memory leak in 'dev_config()'
From: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
[ Upstream commit b6e7aeeaf235901c42ec35de4633c7c69501d303 ]
'kbuf' is allocated just a few lines above using 'memdup_user()'.
If the 'if (dev->buf)' test fails, this memory is never released.
Signed-off-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Signed-off-by: Felipe Balbi <felipe.balbi(a)linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/gadget/legacy/inode.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1837,8 +1837,10 @@ dev_config (struct file *fd, const char
spin_lock_irq (&dev->lock);
value = -EINVAL;
- if (dev->buf)
+ if (dev->buf) {
+ kfree(kbuf);
goto fail;
+ }
dev->buf = kbuf;
/* full or low speed config */
Patches currently in stable-queue which might be from christophe.jaillet(a)wanadoo.fr are
queue-4.4/usb-gadgetfs-fix-a-potential-memory-leak-in-dev_config.patch
This is a note to let you know that I've just added the patch titled
spi_ks8995: fix "BUG: key accdaa28 not in .data!"
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
spi_ks8995-fix-bug-key-accdaa28-not-in-.data.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: "Blomme, Maarten" <Maarten.Blomme(a)flir.com>
Date: Thu, 2 Mar 2017 13:08:36 +0100
Subject: spi_ks8995: fix "BUG: key accdaa28 not in .data!"
From: "Blomme, Maarten" <Maarten.Blomme(a)flir.com>
[ Upstream commit 4342696df764ec65dcdfbd0c10d90ea52505f8ba ]
Signed-off-by: Maarten Blomme <Maarten.Blomme(a)flir.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/phy/spi_ks8995.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -310,6 +310,7 @@ static int ks8995_probe(struct spi_devic
if (err)
return err;
+ sysfs_attr_init(&ks->regs_attr.attr);
err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr);
if (err) {
dev_err(&spi->dev, "unable to create sysfs file, err=%d\n",
Patches currently in stable-queue which might be from Maarten.Blomme(a)flir.com are
queue-4.4/spi_ks8995-fix-bug-key-accdaa28-not-in-.data.patch
This is a note to let you know that I've just added the patch titled
sunrpc: Fix rpc_task_begin trace point
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sunrpc-fix-rpc_task_begin-trace-point.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Fri, 3 Nov 2017 13:46:06 -0400
Subject: sunrpc: Fix rpc_task_begin trace point
From: Chuck Lever <chuck.lever(a)oracle.com>
[ Upstream commit b2bfe5915d5fe7577221031a39ac722a0a2a1199 ]
The rpc_task_begin trace point always display a task ID of zero.
Move the trace point call site so that it picks up the new task ID.
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker(a)Netapp.com>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/sunrpc/sched.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -273,10 +273,9 @@ static inline void rpc_task_set_debuginf
static void rpc_set_active(struct rpc_task *task)
{
- trace_rpc_task_begin(task->tk_client, task, NULL);
-
rpc_task_set_debuginfo(task);
set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
+ trace_rpc_task_begin(task->tk_client, task, NULL);
}
/*
Patches currently in stable-queue which might be from chuck.lever(a)oracle.com are
queue-4.4/sunrpc-fix-rpc_task_begin-trace-point.patch
This is a note to let you know that I've just added the patch titled
sparc64/mm: set fields in deferred pages
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sparc64-mm-set-fields-in-deferred-pages.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Pavel Tatashin <pasha.tatashin(a)oracle.com>
Date: Wed, 15 Nov 2017 17:36:18 -0800
Subject: sparc64/mm: set fields in deferred pages
From: Pavel Tatashin <pasha.tatashin(a)oracle.com>
[ Upstream commit 2a20aa171071a334d80c4e5d5af719d8374702fc ]
Without deferred struct page feature (CONFIG_DEFERRED_STRUCT_PAGE_INIT),
flags and other fields in "struct page"es are never changed prior to
first initializing struct pages by going through __init_single_page().
With deferred struct page feature enabled there is a case where we set
some fields prior to initializing:
mem_init() {
register_page_bootmem_info();
free_all_bootmem();
...
}
When register_page_bootmem_info() is called only non-deferred struct
pages are initialized. But, this function goes through some reserved
pages which might be part of the deferred, and thus are not yet
initialized.
mem_init
register_page_bootmem_info
register_page_bootmem_info_node
get_page_bootmem
.. setting fields here ..
such as: page->freelist = (void *)type;
free_all_bootmem()
free_low_memory_core_early()
for_each_reserved_mem_region()
reserve_bootmem_region()
init_reserved_page() <- Only if this is deferred reserved page
__init_single_pfn()
__init_single_page()
memset(0) <-- Loose the set fields here
We end up with similar issue as in the previous patch, where currently
we do not observe problem as memory is zeroed. But, if flag asserts are
changed we can start hitting issues.
Also, because in this patch series we will stop zeroing struct page
memory during allocation, we must make sure that struct pages are
properly initialized prior to using them.
The deferred-reserved pages are initialized in free_all_bootmem().
Therefore, the fix is to switch the above calls.
Link: http://lkml.kernel.org/r/20171013173214.27300-4-pasha.tatashin@oracle.com
Signed-off-by: Pavel Tatashin <pasha.tatashin(a)oracle.com>
Reviewed-by: Steven Sistare <steven.sistare(a)oracle.com>
Reviewed-by: Daniel Jordan <daniel.m.jordan(a)oracle.com>
Reviewed-by: Bob Picco <bob.picco(a)oracle.com>
Acked-by: David S. Miller <davem(a)davemloft.net>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Ryabinin <aryabinin(a)virtuozzo.com>
Cc: Ard Biesheuvel <ard.biesheuvel(a)linaro.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Christian Borntraeger <borntraeger(a)de.ibm.com>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: Heiko Carstens <heiko.carstens(a)de.ibm.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Sam Ravnborg <sam(a)ravnborg.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Will Deacon <will.deacon(a)arm.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/sparc/mm/init_64.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2402,10 +2402,17 @@ void __init mem_init(void)
{
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
- register_page_bootmem_info();
free_all_bootmem();
/*
+ * Must be done after boot memory is put on freelist, because here we
+ * might set fields in deferred struct pages that have not yet been
+ * initialized, and free_all_bootmem() initializes all the reserved
+ * deferred pages for us.
+ */
+ register_page_bootmem_info();
+
+ /*
* Set up the zero page, mark it reserved, so that page count
* is not manipulated when freeing the page from user ptes.
*/
Patches currently in stable-queue which might be from pasha.tatashin(a)oracle.com are
queue-4.4/sparc64-mm-set-fields-in-deferred-pages.patch
This is a note to let you know that I've just added the patch titled
sctp: use the right sk after waking up from wait_buf sleep
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sctp-use-the-right-sk-after-waking-up-from-wait_buf-sleep.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Xin Long <lucien.xin(a)gmail.com>
Date: Wed, 15 Nov 2017 16:57:26 +0800
Subject: sctp: use the right sk after waking up from wait_buf sleep
From: Xin Long <lucien.xin(a)gmail.com>
[ Upstream commit cea0cc80a6777beb6eb643d4ad53690e1ad1d4ff ]
Commit dfcb9f4f99f1 ("sctp: deny peeloff operation on asocs with threads
sleeping on it") fixed the race between peeloff and wait sndbuf by
checking waitqueue_active(&asoc->wait) in sctp_do_peeloff().
But it actually doesn't work, as even if waitqueue_active returns false
the waiting sndbuf thread may still not yet hold sk lock. After asoc is
peeled off, sk is not asoc->base.sk any more, then to hold the old sk
lock couldn't make assoc safe to access.
This patch is to fix this by changing to hold the new sk lock if sk is
not asoc->base.sk, meanwhile, also set the sk in sctp_sendmsg with the
new sk.
With this fix, there is no more race between peeloff and waitbuf, the
check 'waitqueue_active' in sctp_do_peeloff can be removed.
Thanks Marcelo and Neil for making this clear.
v1->v2:
fix it by changing to lock the new sock instead of adding a flag in asoc.
Suggested-by: Neil Horman <nhorman(a)tuxdriver.com>
Signed-off-by: Xin Long <lucien.xin(a)gmail.com>
Acked-by: Neil Horman <nhorman(a)tuxdriver.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/sctp/socket.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -82,8 +82,8 @@
/* Forward declarations for internal helper functions. */
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
-static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
- size_t msg_len);
+static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
+ size_t msg_len, struct sock **orig_sk);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -1953,7 +1953,8 @@ static int sctp_sendmsg(struct sock *sk,
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
+ /* sk can be changed by peel off when waiting for buf. */
+ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
if (err) {
if (err == -ESRCH) {
/* asoc is already dead. */
@@ -4466,12 +4467,6 @@ int sctp_do_peeloff(struct sock *sk, sct
if (!asoc)
return -EINVAL;
- /* If there is a thread waiting on more sndbuf space for
- * sending on this asoc, it cannot be peeled.
- */
- if (waitqueue_active(&asoc->wait))
- return -EBUSY;
-
/* An association cannot be branched off from an already peeled-off
* socket, nor is this supported for tcp style sockets.
*/
@@ -6981,7 +6976,7 @@ void sctp_sock_rfree(struct sk_buff *skb
/* Helper function to wait for space in the sndbuf. */
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
- size_t msg_len)
+ size_t msg_len, struct sock **orig_sk)
{
struct sock *sk = asoc->base.sk;
int err = 0;
@@ -7015,11 +7010,17 @@ static int sctp_wait_for_sndbuf(struct s
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
lock_sock(sk);
+ if (sk != asoc->base.sk) {
+ release_sock(sk);
+ sk = asoc->base.sk;
+ lock_sock(sk);
+ }
*timeo_p = current_timeo;
}
out:
+ *orig_sk = sk;
finish_wait(&asoc->wait, &wait);
/* Release the association's refcnt. */
Patches currently in stable-queue which might be from lucien.xin(a)gmail.com are
queue-4.4/route-update-fnhe_expires-for-redirect-when-the-fnhe-exists.patch
queue-4.4/route-also-update-fnhe_genid-when-updating-a-route-cache.patch
queue-4.4/sctp-use-the-right-sk-after-waking-up-from-wait_buf-sleep.patch
queue-4.4/sctp-do-not-free-asoc-when-it-is-already-dead-in-sctp_sendmsg.patch
This is a note to let you know that I've just added the patch titled
selftest/powerpc: Fix false failures for skipped tests
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
selftest-powerpc-fix-false-failures-for-skipped-tests.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Sachin Sant <sachinp(a)linux.vnet.ibm.com>
Date: Sun, 26 Feb 2017 11:38:39 +0530
Subject: selftest/powerpc: Fix false failures for skipped tests
From: Sachin Sant <sachinp(a)linux.vnet.ibm.com>
[ Upstream commit a6d8a21596df041f36f4c2ccc260c459e3e851f1 ]
Tests under alignment subdirectory are skipped when executed on previous
generation hardware, but harness still marks them as failed.
test: test_copy_unaligned
tags: git_version:unknown
[SKIP] Test skipped on line 26
skip: test_copy_unaligned
selftests: copy_unaligned [FAIL]
The MAGIC_SKIP_RETURN_VALUE value assigned to rc variable is retained till
the program exit which causes the test to be marked as failed.
This patch resets the value before returning to the main() routine.
With this patch the test o/p is as follows:
test: test_copy_unaligned
tags: git_version:unknown
[SKIP] Test skipped on line 26
skip: test_copy_unaligned
selftests: copy_unaligned [PASS]
Signed-off-by: Sachin Sant <sachinp(a)linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
tools/testing/selftests/powerpc/harness.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -109,9 +109,11 @@ int test_harness(int (test_function)(voi
rc = run_test(test_function, name);
- if (rc == MAGIC_SKIP_RETURN_VALUE)
+ if (rc == MAGIC_SKIP_RETURN_VALUE) {
test_skip(name);
- else
+ /* so that skipped test is not marked as failed */
+ rc = 0;
+ } else
test_finish(name, rc);
return rc;
Patches currently in stable-queue which might be from sachinp(a)linux.vnet.ibm.com are
queue-4.4/module-set-__jump_table-alignment-to-8.patch
queue-4.4/selftest-powerpc-fix-false-failures-for-skipped-tests.patch
This is a note to let you know that I've just added the patch titled
sctp: do not free asoc when it is already dead in sctp_sendmsg
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
sctp-do-not-free-asoc-when-it-is-already-dead-in-sctp_sendmsg.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Tue Dec 12 13:38:50 CET 2017
From: Xin Long <lucien.xin(a)gmail.com>
Date: Wed, 15 Nov 2017 16:55:54 +0800
Subject: sctp: do not free asoc when it is already dead in sctp_sendmsg
From: Xin Long <lucien.xin(a)gmail.com>
[ Upstream commit ca3af4dd28cff4e7216e213ba3b671fbf9f84758 ]
Now in sctp_sendmsg sctp_wait_for_sndbuf could schedule out without
holding sock sk. It means the current asoc can be freed elsewhere,
like when receiving an abort packet.
If the asoc is just created in sctp_sendmsg and sctp_wait_for_sndbuf
returns err, the asoc will be freed again due to new_asoc is not nil.
An use-after-free issue would be triggered by this.
This patch is to fix it by setting new_asoc with nil if the asoc is
already dead when cpu schedules back, so that it will not be freed
again in sctp_sendmsg.
v1->v2:
set new_asoc as nil in sctp_sendmsg instead of sctp_wait_for_sndbuf.
Suggested-by: Neil Horman <nhorman(a)tuxdriver.com>
Reported-by: Dmitry Vyukov <dvyukov(a)google.com>
Signed-off-by: Xin Long <lucien.xin(a)gmail.com>
Acked-by: Neil Horman <nhorman(a)tuxdriver.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin(a)verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/sctp/socket.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1954,8 +1954,14 @@ static int sctp_sendmsg(struct sock *sk,
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
if (!sctp_wspace(asoc)) {
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
- if (err)
+ if (err) {
+ if (err == -ESRCH) {
+ /* asoc is already dead. */
+ new_asoc = NULL;
+ err = -EPIPE;
+ }
goto out_free;
+ }
}
/* If an address is passed with the sendto/sendmsg call, it is used
@@ -6992,10 +6998,11 @@ static int sctp_wait_for_sndbuf(struct s
for (;;) {
prepare_to_wait_exclusive(&asoc->wait, &wait,
TASK_INTERRUPTIBLE);
+ if (asoc->base.dead)
+ goto do_dead;
if (!*timeo_p)
goto do_nonblock;
- if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
- asoc->base.dead)
+ if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING)
goto do_error;
if (signal_pending(current))
goto do_interrupted;
@@ -7020,6 +7027,10 @@ out:
return err;
+do_dead:
+ err = -ESRCH;
+ goto out;
+
do_error:
err = -EPIPE;
goto out;
Patches currently in stable-queue which might be from lucien.xin(a)gmail.com are
queue-4.4/route-update-fnhe_expires-for-redirect-when-the-fnhe-exists.patch
queue-4.4/route-also-update-fnhe_genid-when-updating-a-route-cache.patch
queue-4.4/sctp-use-the-right-sk-after-waking-up-from-wait_buf-sleep.patch
queue-4.4/sctp-do-not-free-asoc-when-it-is-already-dead-in-sctp_sendmsg.patch