The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 834e772c8db0c6a275d75315d90aba4ebbb1e249 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha(a)redhat.com>
Date: Mon, 5 Nov 2018 10:35:47 +0000
Subject: [PATCH] vhost/vsock: fix use-after-free in network stack callers
If the network stack calls .send_pkt()/.cancel_pkt() during .release(),
a struct vhost_vsock use-after-free is possible. This occurs because
.release() does not wait for other CPUs to stop using struct
vhost_vsock.
Switch to an RCU-enabled hashtable (indexed by guest CID) so that
.release() can wait for other CPUs by calling synchronize_rcu(). This
also eliminates vhost_vsock_lock acquisition in the data path so it
could have a positive effect on performance.
This is CVE-2018-14625 "kernel: use-after-free Read in vhost_transport_send_pkt".
Cc: stable(a)vger.kernel.org
Reported-and-tested-by: syzbot+bd391451452fb0b93039(a)syzkaller.appspotmail.com
Reported-by: syzbot+e3e074963495f92a89ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+d5a0a170c5069658b141(a)syzkaller.appspotmail.com
Signed-off-by: Stefan Hajnoczi <stefanha(a)redhat.com>
Signed-off-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Jason Wang <jasowang(a)redhat.com>
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 731e2ea2aeca..98ed5be132c6 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -15,6 +15,7 @@
#include <net/sock.h>
#include <linux/virtio_vsock.h>
#include <linux/vhost.h>
+#include <linux/hashtable.h>
#include <net/af_vsock.h>
#include "vhost.h"
@@ -27,14 +28,14 @@ enum {
/* Used to track all the vhost_vsock instances on the system. */
static DEFINE_SPINLOCK(vhost_vsock_lock);
-static LIST_HEAD(vhost_vsock_list);
+static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
struct vhost_vsock {
struct vhost_dev dev;
struct vhost_virtqueue vqs[2];
- /* Link to global vhost_vsock_list, protected by vhost_vsock_lock */
- struct list_head list;
+ /* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */
+ struct hlist_node hash;
struct vhost_work send_pkt_work;
spinlock_t send_pkt_list_lock;
@@ -50,11 +51,14 @@ static u32 vhost_transport_get_local_cid(void)
return VHOST_VSOCK_DEFAULT_HOST_CID;
}
-static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid)
+/* Callers that dereference the return value must hold vhost_vsock_lock or the
+ * RCU read lock.
+ */
+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
{
struct vhost_vsock *vsock;
- list_for_each_entry(vsock, &vhost_vsock_list, list) {
+ hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
u32 other_cid = vsock->guest_cid;
/* Skip instances that have no CID yet */
@@ -69,17 +73,6 @@ static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid)
return NULL;
}
-static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
-{
- struct vhost_vsock *vsock;
-
- spin_lock_bh(&vhost_vsock_lock);
- vsock = __vhost_vsock_get(guest_cid);
- spin_unlock_bh(&vhost_vsock_lock);
-
- return vsock;
-}
-
static void
vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
struct vhost_virtqueue *vq)
@@ -210,9 +203,12 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
struct vhost_vsock *vsock;
int len = pkt->len;
+ rcu_read_lock();
+
/* Find the vhost_vsock according to guest context id */
vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
if (!vsock) {
+ rcu_read_unlock();
virtio_transport_free_pkt(pkt);
return -ENODEV;
}
@@ -225,6 +221,8 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
spin_unlock_bh(&vsock->send_pkt_list_lock);
vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+
+ rcu_read_unlock();
return len;
}
@@ -234,12 +232,15 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
struct vhost_vsock *vsock;
struct virtio_vsock_pkt *pkt, *n;
int cnt = 0;
+ int ret = -ENODEV;
LIST_HEAD(freeme);
+ rcu_read_lock();
+
/* Find the vhost_vsock according to guest context id */
vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
if (!vsock)
- return -ENODEV;
+ goto out;
spin_lock_bh(&vsock->send_pkt_list_lock);
list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
@@ -265,7 +266,10 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
vhost_poll_queue(&tx_vq->poll);
}
- return 0;
+ ret = 0;
+out:
+ rcu_read_unlock();
+ return ret;
}
static struct virtio_vsock_pkt *
@@ -533,10 +537,6 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
spin_lock_init(&vsock->send_pkt_list_lock);
INIT_LIST_HEAD(&vsock->send_pkt_list);
vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
-
- spin_lock_bh(&vhost_vsock_lock);
- list_add_tail(&vsock->list, &vhost_vsock_list);
- spin_unlock_bh(&vhost_vsock_lock);
return 0;
out:
@@ -585,9 +585,13 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
struct vhost_vsock *vsock = file->private_data;
spin_lock_bh(&vhost_vsock_lock);
- list_del(&vsock->list);
+ if (vsock->guest_cid)
+ hash_del_rcu(&vsock->hash);
spin_unlock_bh(&vhost_vsock_lock);
+ /* Wait for other CPUs to finish using vsock */
+ synchronize_rcu();
+
/* Iterating over all connections for all CIDs to find orphans is
* inefficient. Room for improvement here. */
vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
@@ -628,12 +632,17 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
/* Refuse if CID is already in use */
spin_lock_bh(&vhost_vsock_lock);
- other = __vhost_vsock_get(guest_cid);
+ other = vhost_vsock_get(guest_cid);
if (other && other != vsock) {
spin_unlock_bh(&vhost_vsock_lock);
return -EADDRINUSE;
}
+
+ if (vsock->guest_cid)
+ hash_del_rcu(&vsock->hash);
+
vsock->guest_cid = guest_cid;
+ hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid);
spin_unlock_bh(&vhost_vsock_lock);
return 0;
On Mon, 10 Dec 2018 21:09:20 +0000
Sasha Levin <sashal(a)kernel.org> wrote:
> Hi,
>
> [This is an automated email]
>
> This commit has been processed because it contains a "Fixes:" tag,
> fixing commit: .
>
> The bot has tested the following trees: v4.19.8, v4.14.87, v4.9.144, v4.4.166, v3.18.128,
>
> v4.19.8: Build OK!
> v4.14.87: Failed to apply! Possible dependencies:
> 37db96bb4962 ("tools lib traceevent: Handle new pointer processing of bprint strings")
Bah, I cut and pasted incorrectly the sha1. I had:
7db96bb49629 (chopped off the 3)
-- Steve
>
> v4.9.144: Failed to apply! Possible dependencies:
> 37db96bb4962 ("tools lib traceevent: Handle new pointer processing of bprint strings")
>
> v4.4.166: Failed to apply! Possible dependencies:
> 37db96bb4962 ("tools lib traceevent: Handle new pointer processing of bprint strings")
>
> v3.18.128: Failed to apply! Possible dependencies:
> 37db96bb4962 ("tools lib traceevent: Handle new pointer processing of bprint strings")
> 38d70b7ca176 ("tools lib traceevent: Simplify pointer print logic and fix %pF")
> 3d199b5be533 ("tools lib traceevent: Add support for IP address formats")
> b6bd9c7d543a ("tools lib traceevent: Support %ps/%pS")
>
>
> How should we proceed with this patch?
>
> --
> Thanks,
> Sasha
From: Jens Axboe <axboe(a)kernel.dk>
commit f7068114d45ec55996b9040e98111afa56e010fe upstream.
We're casting the CDROM layer request_sense to the SCSI sense
buffer, but the former is 64 bytes and the latter is 96 bytes.
As we generally allocate these on the stack, we end up blowing
up the stack.
Fix this by wrapping the scsi_execute() call with a properly
sized sense buffer, and copying back the bits for the CDROM
layer.
Reported-by: Piotr Gabriel Kosinski <pg.kosinski(a)gmail.com>
Reported-by: Daniel Shapira <daniel(a)twistlock.com>
Tested-by: Kees Cook <keescook(a)chromium.org>
Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request")
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
[bwh: Despite what the "Fixes" field says, a buffer overrun was already
possible if the sense data was really > 64 bytes long.
Backported to 4.9:
- We always need to allocate a sense buffer in order to call
scsi_normalize_sense()
- Remove the existing conditional heap-allocation of the sense buffer]
Signed-off-by: Ben Hutchings <ben.hutchings(a)codethink.co.uk>
---
drivers/scsi/sr_ioctl.c | 21 +++++++--------------
1 file changed, 7 insertions(+), 14 deletions(-)
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 03054c0e7689..3c3e8115f73d 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -187,30 +187,25 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
struct scsi_device *SDev;
struct scsi_sense_hdr sshdr;
int result, err = 0, retries = 0;
- struct request_sense *sense = cgc->sense;
+ unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE];
SDev = cd->device;
- if (!sense) {
- sense = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL);
- if (!sense) {
- err = -ENOMEM;
- goto out;
- }
- }
-
retry:
if (!scsi_block_when_processing_errors(SDev)) {
err = -ENODEV;
goto out;
}
- memset(sense, 0, sizeof(*sense));
+ memset(sense_buffer, 0, sizeof(sense_buffer));
result = scsi_execute(SDev, cgc->cmd, cgc->data_direction,
- cgc->buffer, cgc->buflen, (char *)sense,
+ cgc->buffer, cgc->buflen, sense_buffer,
cgc->timeout, IOCTL_RETRIES, 0, NULL);
- scsi_normalize_sense((char *)sense, sizeof(*sense), &sshdr);
+ scsi_normalize_sense(sense_buffer, sizeof(sense_buffer), &sshdr);
+
+ if (cgc->sense)
+ memcpy(cgc->sense, sense_buffer, sizeof(*cgc->sense));
/* Minimal error checking. Ignore cases we know about, and report the rest. */
if (driver_byte(result) != 0) {
@@ -261,8 +256,6 @@ int sr_do_ioctl(Scsi_CD *cd, struct packet_command *cgc)
/* Wake up a process waiting for device */
out:
- if (!cgc->sense)
- kfree(sense);
cgc->stat = err;
return err;
}
--
Ben Hutchings, Software Developer Codethink Ltd
https://www.codethink.co.uk/ Dale House, 35 Dale Street
Manchester, M1 2HF, United Kingdom
The user triggers the creation of a pseudo-locked region when writing
the requested schemata to the schemata resctrl file. The pseudo-locking
of a region is required to be done on a CPU that is associated with the
cache on which the pseudo-locked region will reside. In order to run the
locking code on a specific CPU the needed CPU has to be selected and
ensured to remain online during the entire locking sequence.
At this time the cpu_hotplug_lock is not taken during the pseudo-lock
region creation and it is thus possible for a CPU to be selected to run
the pseudo-locking code and then that CPU to go offline before the
thread is able to run on it.
Fix this by ensuring that the cpu_hotplug_lock is taken while the CPU on
which code has to run needs to be controlled. Since the cpu_hotplug_lock
is always taken before rdtgroup_mutex the lock order is maintained.
Fixes: e0bdfe8e36f3 ("x86/intel_rdt: Support creation/removal of pseudo-locked region")
Signed-off-by: Reinette Chatre <reinette.chatre(a)intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 03ee13235a45..6f7adb3be01e 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -23,6 +23,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cpu.h>
#include <linux/kernfs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -381,9 +382,11 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
return -EINVAL;
buf[nbytes - 1] = '\0';
+ cpus_read_lock();
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
rdtgroup_kn_unlock(of->kn);
+ cpus_read_unlock();
return -ENOENT;
}
rdt_last_cmd_clear();
@@ -438,6 +441,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
out:
rdtgroup_kn_unlock(of->kn);
+ cpus_read_unlock();
return ret ?: nbytes;
}
--
2.17.0
From: "Steven Rostedt (VMware)" <rostedt(a)goodmis.org>
When create_event_filter() fails in set_trigger_filter(), the filter may
still be allocated and needs to be freed. The caller expects the
data->filter to be updated with the new filter, even if the new filter
failed (we could add an error message by setting set_str parameter of
create_event_filter(), but that's another update).
But because the error would just exit, filter was left hanging and
nothing could free it.
Found by kmemleak detector.
Cc: Tom Zanussi <tom.zanussi(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Fixes: bac5fb97a173a ("tracing: Add and use generic set_trigger_filter() implementation")
Signed-off-by: Steven Rostedt (VMware) <rostedt(a)goodmis.org>
---
kernel/trace/trace_events_trigger.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 2152d1e530cb..cd12ecb66eb9 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -732,8 +732,10 @@ int set_trigger_filter(char *filter_str,
/* The filter is for the 'trigger' event, not the triggered event */
ret = create_event_filter(file->event_call, filter_str, false, &filter);
- if (ret)
- goto out;
+ /*
+ * If create_event_filter() fails, filter still needs to be freed.
+ * Which the calling code will do with data->filter.
+ */
assign:
tmp = rcu_access_pointer(data->filter);
--
2.19.1