User can specify a smaller meta buffer than what the device is wired to update/access. Kernel makes a copy of the meta buffer into which the device does DMA. As a result, the device overwrites the unrelated kernel memory, causing random kernel crashes.
Same issue is possible for extended-lba case also. When user specifies a short unaligned buffer, the kernel makes a copy and uses that for DMA.
Detect these situations and prevent corruption for unprivileged user passthrough. No change to status-quo for privileged/root user.
Fixes: 63263d60e0f9 ("nvme: Use metadata for passthrough commands") Cc: stable@vger.kernel.org
Reported-by: Vincent Fu vincent.fu@samsung.com Signed-off-by: Kanchan Joshi joshi.k@samsung.com --- Changes since v3: - Block only unprivileged user - Harden the checks by disallowing everything for which data length (nlb) can not be determined - Separate the bounce buffer checks to a different function - Factor in CSIs beyond NVM and ZNS
Changes since v2: - Handle extended-lba case: short unaligned buffer IO - Reduce the scope of check to only well-known commands - Do not check early. Move it deeper so that check gets executed less often - Combine two patches into one.
Changes since v1: - Revise the check to exclude PRACT=1 case
drivers/nvme/host/ioctl.c | 116 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index d8ff796fd5f2..57160ca02e65 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -96,6 +96,76 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval) return (void __user *)ptrval; }
+static inline bool nvme_nlb_in_cdw12(struct nvme_ns *ns, u8 opcode) +{ + u8 csi = ns->head->ids.csi; + + if (csi != NVME_CSI_NVM && csi != NVME_CSI_ZNS) + return false; + + switch (opcode) { + case nvme_cmd_read: + case nvme_cmd_write: + case nvme_cmd_compare: + case nvme_cmd_zone_append: + return true; + default: + return false; + } +} + +/* + * NVMe has no separate field to encode the metadata length expected + * (except when using SGLs). + * + * Because of that we can't allow to transfer arbitrary metadata, as + * a metadata buffer that is shorted than what the device expects for + * the command will lead to arbitrary kernel (if bounce buffering) or + * userspace (if not) memory corruption. + * + * Check that external metadata is only specified for the few commands + * where we know the length based of other fields, and that it fits + * the actual data transfer from/to the device. + */ +static bool nvme_validate_metadata_len(struct request *req, unsigned meta_len) +{ + struct nvme_ns *ns = req->q->queuedata; + struct nvme_command *c = nvme_req(req)->cmd; + u32 len_by_nlb; + + /* Do not guard admin */ + if (capable(CAP_SYS_ADMIN)) + return true; + + /* Block commands that do not have nlb in cdw12 */ + if (!nvme_nlb_in_cdw12(ns, c->common.opcode)) { + dev_err(ns->ctrl->device, + "unknown metadata command %c\n", c->common.opcode); + return false; + } + + /* Skip when PI is inserted or stripped and not transferred */ + if (ns->ms == ns->pi_size && + (c->rw.control & cpu_to_le16(NVME_RW_PRINFO_PRACT))) + return true; + + if (ns->features & NVME_NS_EXT_LBAS) { + dev_err(ns->ctrl->device, + "requires extended LBAs for metadata\n"); + return false; + } + + len_by_nlb = (le16_to_cpu(c->rw.length) + 1) * ns->ms; + if (meta_len < len_by_nlb) { + dev_err(ns->ctrl->device, + "metadata length (%u instad of %u) is too small.\n", + meta_len, len_by_nlb); + return false; + } + + return true; +} + static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, unsigned len, u32 seed) { @@ -104,6 +174,9 @@ static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, void *buf; struct bio *bio = req->bio;
+ if (!nvme_validate_metadata_len(req, len)) + return ERR_PTR(-EINVAL); + buf = kmalloc(len, GFP_KERNEL); if (!buf) goto out; @@ -134,6 +207,41 @@ static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, return ERR_PTR(ret); }
+static bool nvme_validate_buffer_len(struct nvme_ns *ns, struct nvme_command *c, + unsigned meta_len, unsigned data_len) +{ + u32 mlen_by_nlb, dlen_by_nlb; + + /* Do not guard admin */ + if (capable(CAP_SYS_ADMIN)) + return true; + + /* Block commands that do not have nlb in cdw12 */ + if (!nvme_nlb_in_cdw12(ns, c->common.opcode)) { + dev_err(ns->ctrl->device, + "unknown metadata command %c.\n", c->common.opcode); + return false; + } + + /* When PI is inserted or stripped and not transferred.*/ + if (ns->ms == ns->pi_size && + (c->rw.control & cpu_to_le16(NVME_RW_PRINFO_PRACT))) + mlen_by_nlb = 0; + else + mlen_by_nlb = (le16_to_cpu(c->rw.length) + 1) * ns->ms; + + dlen_by_nlb = (le16_to_cpu(c->rw.length) + 1) << ns->lba_shift; + + if (data_len < (dlen_by_nlb + mlen_by_nlb)) { + dev_err(ns->ctrl->device, + "buffer length (%u instad of %u) is too small.\n", + data_len, dlen_by_nlb + mlen_by_nlb); + return false; + } + + return true; +} + static int nvme_finish_user_metadata(struct request *req, void __user *ubuf, void *meta, unsigned len, int ret) { @@ -202,6 +310,14 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, } *metap = meta; } + /* Guard for a short bounce buffer */ + if (bio->bi_private) { + if (!nvme_validate_buffer_len(ns, nvme_req(req)->cmd, + meta_len, bufflen)) { + ret = -EINVAL; + goto out_unmap; + } + }
return ret;