From: Ronnie Sahlberg <lsahlber(a)redhat.com>
[ Upstream commit e6c47dd0da1e3a484e778046fc10da0b20606a86 ]
Some SMB2/3 servers, Win2016 but possibly others too, adds padding
not only between PDUs in a compound but also to the final PDU.
This padding extends the PDU to a multiple of 8 bytes.
Check if the unexpected length looks like this might be the case
and avoid triggering the log messages for :
"SMB2 server sent bad RFC1001 len %d not %d\n"
Signed-off-by: Ronnie Sahlberg <lsahlber(a)redhat.com>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin(a)microsoft.com>
---
fs/cifs/smb2misc.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 3ff7cec2da81..239215dcc00b 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -240,6 +240,13 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
if (clc_len == len + 1)
return 0;
+ /*
+ * Some windows servers (win2016) will pad also the final
+ * PDU in a compound to 8 bytes.
+ */
+ if (((clc_len + 7) & ~7) == len)
+ return 0;
+
/*
* MacOS server pads after SMB2.1 write response with 3 bytes
* of junk. Other servers match RFC1001 len to actual
--
2.17.1
From: Ralph Campbell <rcampbell(a)nvidia.com>
Private ZONE_DEVICE pages use a special pte entry and thus are not
present. Properly handle this case in map_pte(), it is already handled
in check_pte(), the map_pte() part was lost in some rebase most probably.
Without this patch the slow migration path can not migrate back private
ZONE_DEVICE memory to regular memory. This was found after stress
testing migration back to system memory. This ultimatly can lead the
CPU to an infinite page fault loop on the special swap entry.
Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Signed-off-by: Jérôme Glisse <jglisse(a)redhat.com>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
mm/page_vma_mapped.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index ae3c2a35d61b..1cf5b9bfb559 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -21,6 +21,15 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
if (!is_swap_pte(*pvmw->pte))
return false;
} else {
+ if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
+
+ /* Handle un-addressable ZONE_DEVICE memory */
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (is_device_private_entry(entry))
+ return true;
+ }
+
if (!pte_present(*pvmw->pte))
return false;
}
--
2.17.1
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 024d83cadc6b2af027e473720f3c3da97496c318 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Date: Sun, 12 Aug 2018 20:41:45 +0200
Subject: [PATCH] KVM: x86: SVM: Call x86_spec_ctrl_set_guest/host() with
interrupts disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Mikhail reported the following lockdep splat:
WARNING: possible irq lock inversion dependency detected
CPU 0/KVM/10284 just changed the state of lock:
000000000d538a88 (&st->lock){+...}, at:
speculative_store_bypass_update+0x10b/0x170
but this lock was taken by another, HARDIRQ-safe lock
in the past:
(&(&sighand->siglock)->rlock){-.-.}
and interrupts could create inverse lock ordering between them.
Possible interrupt unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&st->lock);
local_irq_disable();
lock(&(&sighand->siglock)->rlock);
lock(&st->lock);
<Interrupt>
lock(&(&sighand->siglock)->rlock);
*** DEADLOCK ***
The code path which connects those locks is:
speculative_store_bypass_update()
ssb_prctl_set()
do_seccomp()
do_syscall_64()
In svm_vcpu_run() speculative_store_bypass_update() is called with
interupts enabled via x86_virt_spec_ctrl_set_guest/host().
This is actually a false positive, because GIF=0 so interrupts are
disabled even if IF=1; however, we can easily move the invocations of
x86_virt_spec_ctrl_set_guest/host() into the interrupt disabled region to
cure it, and it's a good idea to keep the GIF=0/IF=1 area as small
and self-contained as possible.
Fixes: 1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD")
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov(a)gmail.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov(a)gmail.com>
Cc: Joerg Roedel <joro(a)8bytes.org>
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Cc: Radim Krčmář <rkrcmar(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Borislav Petkov <bp(a)suse.de>
Cc: Konrad Rzeszutek Wilk <konrad.wilk(a)oracle.com>
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: kvm(a)vger.kernel.org
Cc: x86(a)kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 73e27a98456f..6276140044d0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5586,8 +5586,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
clgi();
- local_irq_enable();
-
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
* it's non-zero. Since vmentry is serialising on affected CPUs, there
@@ -5596,6 +5594,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
*/
x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
+ local_irq_enable();
+
asm volatile (
"push %%" _ASM_BP "; \n\t"
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@@ -5718,12 +5718,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
-
reload_tss(vcpu);
local_irq_disable();
+ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
In the __getcpu function, lsl was using the wrong target
and destination registers. Luckily, the compiler tends to
choose %eax for both variables, so it has been working
so far.
Cc: x86(a)kernel.org
Cc: stable(a)vger.kernel.org
Signed-off-by: Samuel Neves <sneves(a)dei.uc.pt>
---
arch/x86/include/asm/vgtod.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9f0449..53748541c487 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void)
*
* If RDPID is available, use it.
*/
- alternative_io ("lsl %[p],%[seg]",
+ alternative_io ("lsl %[seg],%[p]",
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
X86_FEATURE_RDPID,
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
--
2.17.1
In the __getcpu function, lsl was using the wrong target
and destination registers. Luckily, the compiler tends to
choose %eax for both variables, so it has been working
so far.
Signed-off-by: Samuel Neves <sneves(a)dei.uc.pt>
---
arch/x86/include/asm/vgtod.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index fb856c9f0449..53748541c487 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -93,7 +93,7 @@ static inline unsigned int __getcpu(void)
*
* If RDPID is available, use it.
*/
- alternative_io ("lsl %[p],%[seg]",
+ alternative_io ("lsl %[seg],%[p]",
".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
X86_FEATURE_RDPID,
[p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
--
2.17.1
From: Jason Wang <jasowang(a)redhat.com>
commit b196d88aba8ac72b775137854121097f4c4c6862 upstream.
We used to initialize ptr_ring during TUNSETIFF, this is because its
size depends on the tx_queue_len of netdevice. And we try to clean it
up when socket were detached from netdevice. A race were spotted when
trying to do uninit during a read which will lead a use after free for
pointer ring. Solving this by always initialize a zero size ptr_ring
in open() and do resizing during TUNSETIFF, and then we can safely do
cleanup during close(). With this, there's no need for the workaround
that was introduced by commit 4df0bfc79904 ("tun: fix a memory leak
for tfile->tx_array").
Backport Note :-
This is a backport of following 2 upstream patches(the second fixes the
first).
b196d88aba ("tun: fix use after free for ptr_ring")
7063efd33b ("tuntap: fix use after free during release")
Comparison with the upstream patch:
[1] A "semantic revert" of the changes made in
4df0bfc799("tun: fix a memory leak for tfile->tx_array").
4df0bfc799 was applied upstream, and then skb array was changed
to use ptr_ring. The upstream fix then removes the changes introduced
by 4df0bfc799. This backport does the same; "revert" the changes
made by 4df0bfc799.
[2] xdp_rxq_info_unreg() being called in relevant locations
As xdp_rxq_info related patches are not present in 4.14, these
changes are not needed in the backport.
[3] An instance of ptr_ring_init needs to be replaced by skb_array_init.
[4] ptr_ring_cleanup needs to be replaced by skb_array_cleanup.
b196d88ab places the cleanup function in tun_chr_close() only to
later move it into __tun_detach in upstream commit
7063efd33bb("tuntap: fix use after free during release"). So
place skb_array_cleanup in __tun_detach.
Reported-by: syzbot+e8b902c3c3fadf0a9dba(a)syzkaller.appspotmail.com
Cc: Eric Dumazet <eric.dumazet(a)gmail.com>
Cc: Cong Wang <xiyou.wangcong(a)gmail.com>
Cc: Michael S. Tsirkin <mst(a)redhat.com>
Fixes: 1576d9860599 ("tun: switch to use skb array for tx")
Signed-off-by: Jason Wang <jasowang(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Zubin Mithra <zsm(a)chromium.org>
---
drivers/net/tun.c | 21 +++++++--------------
1 file changed, 7 insertions(+), 14 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index cb17ffadfc30..e0baea2dfd3c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -534,14 +534,6 @@ static void tun_queue_purge(struct tun_file *tfile)
skb_queue_purge(&tfile->sk.sk_error_queue);
}
-static void tun_cleanup_tx_array(struct tun_file *tfile)
-{
- if (tfile->tx_array.ring.queue) {
- skb_array_cleanup(&tfile->tx_array);
- memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
- }
-}
-
static void __tun_detach(struct tun_file *tfile, bool clean)
{
struct tun_file *ntfile;
@@ -583,7 +575,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}
- tun_cleanup_tx_array(tfile);
+ skb_array_cleanup(&tfile->tx_array);
sock_put(&tfile->sk);
}
}
@@ -623,13 +615,11 @@ static void tun_detach_all(struct net_device *dev)
/* Drop read queue */
tun_queue_purge(tfile);
sock_put(&tfile->sk);
- tun_cleanup_tx_array(tfile);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
tun_enable_queue(tfile);
tun_queue_purge(tfile);
sock_put(&tfile->sk);
- tun_cleanup_tx_array(tfile);
}
BUG_ON(tun->numdisabled != 0);
@@ -675,7 +665,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
}
if (!tfile->detached &&
- skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
+ skb_array_resize(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
err = -ENOMEM;
goto out;
}
@@ -2624,6 +2614,11 @@ static int tun_chr_open(struct inode *inode, struct file * file)
&tun_proto, 0);
if (!tfile)
return -ENOMEM;
+ if (skb_array_init(&tfile->tx_array, 0, GFP_KERNEL)) {
+ sk_free(&tfile->sk);
+ return -ENOMEM;
+ }
+
RCU_INIT_POINTER(tfile->tun, NULL);
tfile->flags = 0;
tfile->ifindex = 0;
@@ -2644,8 +2639,6 @@ static int tun_chr_open(struct inode *inode, struct file * file)
sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
- memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
-
return 0;
}
--
2.19.0.rc0.228.g281dcd1b4d0-goog