... Another design detail is that listening vsocks are not supposed to have any transport assigned at all. Which implies they are not supported by the sockmap. But this is complicated by the fact that a socket, before switching to TCP_LISTEN, may have had some transport assigned during a failed connect() attempt. Hence, we may end up with a listening vsock in a sockmap, which blows up quickly:
KASAN: null-ptr-deref in range [0x0000000000000120-0x0000000000000127] CPU: 7 UID: 0 PID: 56 Comm: kworker/7:0 Not tainted 6.14.0-rc1+ Workqueue: vsock-loopback vsock_loopback_work RIP: 0010:vsock_read_skb+0x4b/0x90 Call Trace: sk_psock_verdict_data_ready+0xa4/0x2e0 virtio_transport_recv_pkt+0x1ca8/0x2acc vsock_loopback_work+0x27d/0x3f0 process_one_work+0x846/0x1420 worker_thread+0x5b3/0xf80 kthread+0x35a/0x700 ret_from_fork+0x2d/0x70 ret_from_fork_asm+0x1a/0x30
Perhaps I should have expanded more on the null-ptr-deref itself.
The idea is: force a vsock into assigning a transport and add it to the sockmap (with a verdict program), but keep it unconnected. Then, drop the transport and set the vsock to TCP_LISTEN. The moment a new connection is established:
virtio_transport_recv_pkt() virtio_transport_recv_listen() sk->sk_data_ready(sk) i.e. sk_psock_verdict_data_ready() ops->read_skb() i.e. vsock_read_skb() vsk->transport->read_skb() vsk->transport is NULL, boom
Here's a stand-alone repro:
/* * # modprobe -a vsock_loopback vhost_vsock * # gcc test.c && ./a.out */ #include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> #include <sys/socket.h> #include <sys/syscall.h> #include <linux/bpf.h> #include <linux/vm_sockets.h>
static void die(const char *msg) { perror(msg); exit(-1); }
static int sockmap_create(void) { union bpf_attr attr = { .map_type = BPF_MAP_TYPE_SOCKMAP, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1 }; int map;
map = syscall(SYS_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); if (map < 0) die("map_create");
return map; }
static void map_update_elem(int fd, int key, int value) { union bpf_attr attr = { .map_fd = fd, .key = (uint64_t)&key, .value = (uint64_t)&value, .flags = BPF_ANY };
if (syscall(SYS_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr))) die("map_update_elem"); }
static int prog_load(void) { /* mov %r0, 1; exit */ struct bpf_insn insns[] = { { .code = BPF_ALU64 | BPF_MOV | BPF_K, .dst_reg = 0, .imm = 1 }, { .code = BPF_JMP | BPF_EXIT } }; union bpf_attr attr = { .prog_type = BPF_PROG_TYPE_SK_SKB, .insn_cnt = sizeof(insns)/sizeof(insns[0]), .insns = (long)insns, .license = (long)"", }; int prog = syscall(SYS_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); if (prog < 0) die("prog_load");
return prog; }
static void link_create(int prog_fd, int target_fd) { union bpf_attr attr = { .link_create = { .prog_fd = prog_fd, .target_fd = target_fd, .attach_type = BPF_SK_SKB_VERDICT } };
if (syscall(SYS_bpf, BPF_LINK_CREATE, &attr, sizeof(attr)) < 0) die("link_create"); }
int main(void) { struct sockaddr_vm addr = { .svm_family = AF_VSOCK, .svm_cid = VMADDR_CID_LOCAL, .svm_port = VMADDR_PORT_ANY }; socklen_t alen = sizeof(addr); int s, map, prog, c;
s = socket(AF_VSOCK, SOCK_SEQPACKET, 0); if (s < 0) die("socket");
if (bind(s, (struct sockaddr *)&addr, alen)) die("bind");
if (!connect(s, (struct sockaddr *)&addr, alen) || errno != ECONNRESET) die("connect #1");
map = sockmap_create(); prog = prog_load(); link_create(prog, map); map_update_elem(map, 0, s);
addr.svm_cid = 0x42424242; /* non-existing */ if (!connect(s, (struct sockaddr *)&addr, alen) || errno != ESOCKTNOSUPPORT) die("connect #2");
if (listen(s, 1)) die("listen");
if (getsockname(s, (struct sockaddr *)&addr, &alen)) die("getsockname");
c = socket(AF_VSOCK, SOCK_SEQPACKET, 0); if (c < 0) die("socket c");
if (connect(c, (struct sockaddr *)&addr, alen)) die("connect #3");
return 0; }