Passthrough has got a hole that can be exploited to cause kernel memory corruption. This is about making the device do larger DMA into short meta/data buffer owned by kernel [1].
As a stopgap measure, disable the support of unprivileged passthrough.
This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting following patches:
- 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool") - 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions") - 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough") - ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller") - e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN") - 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
[1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.co...
CC: stable@vger.kernel.org # 6.2 Fixes: 855b7717f44b1 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
Suggested-by: Christoph Hellwig hch@lst.de Signed-off-by: Kanchan Joshi joshi.k@samsung.com Reviewed-by: Christoph Hellwig hch@lst.de --- Changes since v1: - Fix the way "Fixes:" was written before (Greg)
drivers/nvme/host/ioctl.c | 159 ++++++++------------------------------ include/linux/nvme.h | 2 - 2 files changed, 34 insertions(+), 127 deletions(-)
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index d8ff796fd5f2..788b36e7915a 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -10,80 +10,8 @@
enum { NVME_IOCTL_VEC = (1 << 0), - NVME_IOCTL_PARTITION = (1 << 1), };
-static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c, - unsigned int flags, bool open_for_write) -{ - u32 effects; - - if (capable(CAP_SYS_ADMIN)) - return true; - - /* - * Do not allow unprivileged passthrough on partitions, as that allows an - * escape from the containment of the partition. - */ - if (flags & NVME_IOCTL_PARTITION) - return false; - - /* - * Do not allow unprivileged processes to send vendor specific or fabrics - * commands as we can't be sure about their effects. - */ - if (c->common.opcode >= nvme_cmd_vendor_start || - c->common.opcode == nvme_fabrics_command) - return false; - - /* - * Do not allow unprivileged passthrough of admin commands except - * for a subset of identify commands that contain information required - * to form proper I/O commands in userspace and do not expose any - * potentially sensitive information. - */ - if (!ns) { - if (c->common.opcode == nvme_admin_identify) { - switch (c->identify.cns) { - case NVME_ID_CNS_NS: - case NVME_ID_CNS_CS_NS: - case NVME_ID_CNS_NS_CS_INDEP: - case NVME_ID_CNS_CS_CTRL: - case NVME_ID_CNS_CTRL: - return true; - } - } - return false; - } - - /* - * Check if the controller provides a Commands Supported and Effects log - * and marks this command as supported. If not reject unprivileged - * passthrough. - */ - effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); - if (!(effects & NVME_CMD_EFFECTS_CSUPP)) - return false; - - /* - * Don't allow passthrough for command that have intrusive (or unknown) - * effects. - */ - if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | - NVME_CMD_EFFECTS_UUID_SEL | - NVME_CMD_EFFECTS_SCOPE_MASK)) - return false; - - /* - * Only allow I/O commands that transfer data to the controller or that - * change the logical block contents if the file descriptor is open for - * writing. - */ - if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) - return open_for_write; - return true; -} - /* * Convert integer values from ioctl structures to user pointers, silently * ignoring the upper bits in the compat case to match behaviour of 32-bit @@ -335,8 +263,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, }
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd __user *ucmd, unsigned int flags, - bool open_for_write) + struct nvme_passthru_cmd __user *ucmd) { struct nvme_passthru_cmd cmd; struct nvme_command c; @@ -344,6 +271,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u64 result; int status;
+ if (!capable(CAP_SYS_ADMIN)) + return -EACCES; if (copy_from_user(&cmd, ucmd, sizeof(cmd))) return -EFAULT; if (cmd.flags) @@ -364,9 +293,6 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15);
- if (!nvme_cmd_allowed(ns, &c, 0, open_for_write)) - return -EACCES; - if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms);
@@ -383,14 +309,16 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, }
static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - struct nvme_passthru_cmd64 __user *ucmd, unsigned int flags, - bool open_for_write) + struct nvme_passthru_cmd64 __user *ucmd, + unsigned int flags) { struct nvme_passthru_cmd64 cmd; struct nvme_command c; unsigned timeout = 0; int status;
+ if (!capable(CAP_SYS_ADMIN)) + return -EACCES; if (copy_from_user(&cmd, ucmd, sizeof(cmd))) return -EFAULT; if (cmd.flags) @@ -411,9 +339,6 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(cmd.cdw14); c.common.cdw15 = cpu_to_le32(cmd.cdw15);
- if (!nvme_cmd_allowed(ns, &c, flags, open_for_write)) - return -EACCES; - if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms);
@@ -563,6 +488,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, void *meta = NULL; int ret;
+ if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + c.common.opcode = READ_ONCE(cmd->opcode); c.common.flags = READ_ONCE(cmd->flags); if (c.common.flags) @@ -584,9 +512,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14)); c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
- if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode & FMODE_WRITE)) - return -EACCES; - d.metadata = READ_ONCE(cmd->metadata); d.addr = READ_ONCE(cmd->addr); d.data_len = READ_ONCE(cmd->data_len); @@ -643,13 +568,13 @@ static bool is_ctrl_ioctl(unsigned int cmd) }
static int nvme_ctrl_ioctl(struct nvme_ctrl *ctrl, unsigned int cmd, - void __user *argp, bool open_for_write) + void __user *argp) { switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); + return nvme_user_cmd(ctrl, NULL, argp); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); + return nvme_user_cmd64(ctrl, NULL, argp, 0); default: return sed_ioctl(ctrl->opal_dev, cmd, argp); } @@ -674,14 +599,16 @@ struct nvme_user_io32 { #endif /* COMPAT_FOR_U64_ALIGNMENT */
static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp, unsigned int flags, bool open_for_write) + void __user *argp) { + unsigned int flags = 0; + switch (cmd) { case NVME_IOCTL_ID: force_successful_syscall_return(); return ns->head->ns_id; case NVME_IOCTL_IO_CMD: - return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); + return nvme_user_cmd(ns->ctrl, ns, argp); /* * struct nvme_user_io can have different padding on some 32-bit ABIs. * Just accept the compat version as all fields that are used are the @@ -696,39 +623,32 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd, flags |= NVME_IOCTL_VEC; fallthrough; case NVME_IOCTL_IO64_CMD: - return nvme_user_cmd64(ns->ctrl, ns, argp, flags, - open_for_write); + return nvme_user_cmd64(ns->ctrl, ns, argp, flags); default: return -ENOTTY; } }
-int nvme_ioctl(struct block_device *bdev, blk_mode_t mode, +int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct nvme_ns *ns = bdev->bd_disk->private_data; - bool open_for_write = mode & BLK_OPEN_WRITE; void __user *argp = (void __user *)arg; - unsigned int flags = 0; - - if (bdev_is_partition(bdev)) - flags |= NVME_IOCTL_PARTITION;
if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); - return nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp); + return nvme_ns_ioctl(ns, cmd, argp); }
long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct nvme_ns *ns = container_of(file_inode(file)->i_cdev, struct nvme_ns, cdev); - bool open_for_write = file->f_mode & FMODE_WRITE; void __user *argp = (void __user *)arg;
if (is_ctrl_ioctl(cmd)) - return nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); - return nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); + return nvme_ctrl_ioctl(ns->ctrl, cmd, argp); + return nvme_ns_ioctl(ns, cmd, argp); }
static int nvme_uring_cmd_checks(unsigned int issue_flags) @@ -792,8 +712,7 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, } #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, - void __user *argp, struct nvme_ns_head *head, int srcu_idx, - bool open_for_write) + void __user *argp, struct nvme_ns_head *head, int srcu_idx) __releases(&head->srcu) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -801,7 +720,7 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
nvme_get_ctrl(ns->ctrl); srcu_read_unlock(&head->srcu, srcu_idx); - ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp, open_for_write); + ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
nvme_put_ctrl(ctrl); return ret; @@ -811,14 +730,9 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct nvme_ns_head *head = bdev->bd_disk->private_data; - bool open_for_write = mode & BLK_OPEN_WRITE; void __user *argp = (void __user *)arg; struct nvme_ns *ns; int srcu_idx, ret = -EWOULDBLOCK; - unsigned int flags = 0; - - if (bdev_is_partition(bdev)) - flags |= NVME_IOCTL_PARTITION;
srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); @@ -831,10 +745,9 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, * deadlock when deleting namespaces using the passthrough interface. */ if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, - open_for_write); + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
- ret = nvme_ns_ioctl(ns, cmd, argp, flags, open_for_write); + ret = nvme_ns_ioctl(ns, cmd, argp); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -843,7 +756,6 @@ int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode, long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - bool open_for_write = file->f_mode & FMODE_WRITE; struct cdev *cdev = file_inode(file)->i_cdev; struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev); @@ -857,10 +769,9 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd, goto out_unlock;
if (is_ctrl_ioctl(cmd)) - return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx, - open_for_write); + return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
- ret = nvme_ns_ioctl(ns, cmd, argp, 0, open_for_write); + ret = nvme_ns_ioctl(ns, cmd, argp); out_unlock: srcu_read_unlock(&head->srcu, srcu_idx); return ret; @@ -909,8 +820,7 @@ int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags) return ret; }
-static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, - bool open_for_write) +static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) { struct nvme_ns *ns; int ret; @@ -934,7 +844,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, kref_get(&ns->kref); up_read(&ctrl->namespaces_rwsem);
- ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); + ret = nvme_user_cmd(ctrl, ns, argp); nvme_put_ns(ns); return ret;
@@ -946,17 +856,16 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, long nvme_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - bool open_for_write = file->f_mode & FMODE_WRITE; struct nvme_ctrl *ctrl = file->private_data; void __user *argp = (void __user *)arg;
switch (cmd) { case NVME_IOCTL_ADMIN_CMD: - return nvme_user_cmd(ctrl, NULL, argp, 0, open_for_write); + return nvme_user_cmd(ctrl, NULL, argp); case NVME_IOCTL_ADMIN64_CMD: - return nvme_user_cmd64(ctrl, NULL, argp, 0, open_for_write); + return nvme_user_cmd64(ctrl, NULL, argp, 0); case NVME_IOCTL_IO_CMD: - return nvme_dev_user_cmd(ctrl, argp, open_for_write); + return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 26dd3f859d9d..df3e2c619448 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -642,7 +642,6 @@ enum { NVME_CMD_EFFECTS_CCC = 1 << 4, NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16), NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, - NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20), };
struct nvme_effects_log { @@ -834,7 +833,6 @@ enum nvme_opcode { nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_recv = 0x7a, nvme_cmd_zone_append = 0x7d, - nvme_cmd_vendor_start = 0x80, };
#define nvme_opcode_name(opcode) { opcode, #opcode }
On Mon, Oct 16, 2023 at 11:35:19AM +0530, Kanchan Joshi wrote:
Passthrough has got a hole that can be exploited to cause kernel memory corruption. This is about making the device do larger DMA into short meta/data buffer owned by kernel [1].
As a stopgap measure, disable the support of unprivileged passthrough.
This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting following patches:
- 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool")
- 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions")
- 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough")
- ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller")
- e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN")
- 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
[1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.co...
CC: stable@vger.kernel.org # 6.2 Fixes: 855b7717f44b1 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
Suggested-by: Christoph Hellwig hch@lst.de Signed-off-by: Kanchan Joshi joshi.k@samsung.com Reviewed-by: Christoph Hellwig hch@lst.de
Applied for nvme-6.6.
On further consideration and some offline chats, I believe this large change is a bit too late for 6.6. I think this should wait for 6.7 (and stable), hopefully preserving non-root access in some sane capacity. It's backed out now, and current nvme-6.6 PR does not include this patch.
On Wed, Oct 18, 2023 at 03:26:20PM -0600, Keith Busch wrote:
On further consideration and some offline chats, I believe this large change is a bit too late for 6.6. I think this should wait for 6.7 (and stable), hopefully preserving non-root access in some sane capacity. It's backed out now, and current nvme-6.6 PR does not include this patch.
Umm, what are the offlist discussions? We leave an exploitable hole in, so I don't think waiting any longer is an option.
On Thu, Oct 19, 2023 at 07:04:11AM +0200, Christoph Hellwig wrote:
On Wed, Oct 18, 2023 at 03:26:20PM -0600, Keith Busch wrote:
On further consideration and some offline chats, I believe this large change is a bit too late for 6.6. I think this should wait for 6.7 (and stable), hopefully preserving non-root access in some sane capacity. It's backed out now, and current nvme-6.6 PR does not include this patch.
Umm, what are the offlist discussions? We leave an exploitable hole in, so I don't think waiting any longer is an option.
Jens repeated what he told me offline on this thread here, and dropped the pull request that contained this patch:
https://lists.infradead.org/pipermail/linux-nvme/2023-October/042684.html
BTW, don't you still need someone with root access to change the permissions on the device handle in order for an unpriveledged user to reach this hole? It's not open access by default, you still have to opt-in.
On Fri, Oct 20, 2023 at 08:25:49AM -0600, Keith Busch wrote:
Jens repeated what he told me offline on this thread here, and dropped the pull request that contained this patch:
https://lists.infradead.org/pipermail/linux-nvme/2023-October/042684.html
BTW, don't you still need someone with root access to change the permissions on the device handle in order for an unpriveledged user to reach this hole? It's not open access by default, you still have to opt-in.
Yes, you need someone with root access to change the device node persmissions. But we allowed that under the assumption it is safe to do so, which it turns out it is not.
On Mon, Oct 23, 2023 at 07:44:56AM +0200, Christoph Hellwig wrote:
Yes, you need someone with root access to change the device node persmissions. But we allowed that under the assumption it is safe to do so, which it turns out it is not.
Okay, iiuc, while we have to opt-in to allow this hole, we need another option for users to set to allow this usage because it's not safe.
Here are two options I have considered for unpriveledged access, please let me know if you have others or thoughts.
Restrict access for processes with CAP_SYS_RAWIO, which can be granted to non-root users. This cap is already used in scsi subsystem, too.
A per nvme-generic namespace sysfs attribute that only root can toggle that would override any caps and just rely on access permissions.
On Mon, Oct 23, 2023 at 09:18:36AM -0600, Keith Busch wrote:
On Mon, Oct 23, 2023 at 07:44:56AM +0200, Christoph Hellwig wrote:
Yes, you need someone with root access to change the device node persmissions. But we allowed that under the assumption it is safe to do so, which it turns out it is not.
Okay, iiuc, while we have to opt-in to allow this hole, we need another option for users to set to allow this usage because it's not safe.
Here are two options I have considered for unpriveledged access, please let me know if you have others or thoughts.
Restrict access for processes with CAP_SYS_RAWIO, which can be granted to non-root users. This cap is already used in scsi subsystem, too.
Well, that's sensible in general.
A per nvme-generic namespace sysfs attribute that only root can toggle that would override any caps and just rely on access permissions.
And that I'm not confident about as long as we can only use the broken PRP scheme on NVMe.
On 10/24/2023 12:37 PM, Christoph Hellwig wrote:
On Mon, Oct 23, 2023 at 09:18:36AM -0600, Keith Busch wrote:
On Mon, Oct 23, 2023 at 07:44:56AM +0200, Christoph Hellwig wrote:
Yes, you need someone with root access to change the device node persmissions. But we allowed that under the assumption it is safe to do so, which it turns out it is not.
Okay, iiuc, while we have to opt-in to allow this hole, we need another option for users to set to allow this usage because it's not safe.
Here are two options I have considered for unpriveledged access, please let me know if you have others or thoughts.
Restrict access for processes with CAP_SYS_RAWIO, which can be granted to non-root users. This cap is already used in scsi subsystem, too.
Well, that's sensible in general.
With that someone needs to make each binary (that wants to use passthrough) capability-aware by doing:
setcap "CAP_SYS_RAWIO=ep" <binary>
Seems extra work for admins (or distros if they need to ship the binary that way).
On Thu, Oct 26, 2023 at 08:01:36PM +0530, Kanchan Joshi wrote:
On 10/24/2023 12:37 PM, Christoph Hellwig wrote:
On Mon, Oct 23, 2023 at 09:18:36AM -0600, Keith Busch wrote:
On Mon, Oct 23, 2023 at 07:44:56AM +0200, Christoph Hellwig wrote:
Yes, you need someone with root access to change the device node persmissions. But we allowed that under the assumption it is safe to do so, which it turns out it is not.
Okay, iiuc, while we have to opt-in to allow this hole, we need another option for users to set to allow this usage because it's not safe.
Here are two options I have considered for unpriveledged access, please let me know if you have others or thoughts.
Restrict access for processes with CAP_SYS_RAWIO, which can be granted to non-root users. This cap is already used in scsi subsystem, too.
Well, that's sensible in general.
With that someone needs to make each binary (that wants to use passthrough) capability-aware by doing:
setcap "CAP_SYS_RAWIO=ep" <binary>
Seems extra work for admins (or distros if they need to ship the binary that way).
The way I usually see it done is add the capability to the user so any binary executed by that user already has the caps.
On Oct 16, 2023 / 11:35, Kanchan Joshi wrote:
Passthrough has got a hole that can be exploited to cause kernel memory corruption. This is about making the device do larger DMA into short meta/data buffer owned by kernel [1].
As a stopgap measure, disable the support of unprivileged passthrough.
This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting following patches:
- 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool")
- 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions")
- 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough")
- ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller")
- e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN")
- 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
[1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.co...
This change looks affecting the blktests test case nvme/046. Should we adjust the test case for the coarse-granular CAP_SYS_ADMIN checks?
On 10/27/2023 12:36 PM, Shinichiro Kawasaki wrote:
On Oct 16, 2023 / 11:35, Kanchan Joshi wrote:
Passthrough has got a hole that can be exploited to cause kernel memory corruption. This is about making the device do larger DMA into short meta/data buffer owned by kernel [1].
As a stopgap measure, disable the support of unprivileged passthrough.
This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting following patches:
- 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool")
- 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions")
- 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough")
- ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller")
- e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN")
- 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
[1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.co...
This change looks affecting the blktests test case nvme/046. Should we adjust the test case for the coarse-granular CAP_SYS_ADMIN checks?
Nothing to adjust in the test, as there is no change in the kernel (at this point). I have made a note to revisit the test if anything changes.
On Oct 27, 2023 / 12:45, Kanchan Joshi wrote:
On 10/27/2023 12:36 PM, Shinichiro Kawasaki wrote:
On Oct 16, 2023 / 11:35, Kanchan Joshi wrote:
Passthrough has got a hole that can be exploited to cause kernel memory corruption. This is about making the device do larger DMA into short meta/data buffer owned by kernel [1].
As a stopgap measure, disable the support of unprivileged passthrough.
This patch brings back coarse-granular CAP_SYS_ADMIN checks by reverting following patches:
- 7d9d7d59d44 ("nvme: replace the fmode_t argument to the nvme ioctl handlers with a simple bool")
- 313c08c72ee ("nvme: don't allow unprivileged passthrough on partitions")
- 6f99ac04c46 ("nvme: consult the CSE log page for unprivileged passthrough")
- ea43fceea41 ("nvme: allow unprivileged passthrough of Identify Controller")
- e4fbcf32c86 ("nvme: identify-namespace without CAP_SYS_ADMIN")
- 855b7717f44 ("nvme: fine-granular CAP_SYS_ADMIN for nvme io commands")
[1] https://lore.kernel.org/linux-nvme/20231013051458.39987-1-joshi.k@samsung.co...
This change looks affecting the blktests test case nvme/046. Should we adjust the test case for the coarse-granular CAP_SYS_ADMIN checks?
Nothing to adjust in the test, as there is no change in the kernel (at this point). I have made a note to revisit the test if anything changes.
Alright, thanks for the care.
linux-stable-mirror@lists.linaro.org