Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we have no way to distinguish between a user decision to set a block_device read-only and the disk being write protected as a result of the hardware state.
To overcome this we add a third state to the gendisk read-only policy. This flag is exlusively used when the user forces a struct block_device read-only via BLKROSET. We currently don't allow switching ro state in sysfs so the ioctl is the only entry point for this new state.
In set_disk_ro() we check whether the user override flag is in effect for a disk before changing read-only state based on the device settings. This means that devices that have a delay before going read-write will now be able to clear the read-only state. And devices where the admin or udev has forced the disk read-only will not cause the gendisk policy to reflect the mode reported by the device.
Cc: Jeremy Cline jeremy@jcline.org Cc: Oleksii Kurochko olkuroch@cisco.com Cc: stable@vger.kernel.org # v4.16+ Reported-by: Oleksii Kurochko olkuroch@cisco.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") Signed-off-by: Martin K. Petersen martin.petersen@oracle.com
---
I have verified that get_disk_ro() and bdev_read_only() callers all handle the additional value correctly. Same is true for "ro" in sysfs.
Note that per-partition ro settings are lost on revalidate. This has been broken for at least a decade and it will require major surgery to fix. To my knowledge nobody has complained about being unable to make partition read-only settings stick through a revalidate. So hopefully this patch will suffice as a simple fix for stable. --- block/genhd.c | 13 ++++++++++++- block/ioctl.c | 3 ++- drivers/scsi/sd.c | 4 +--- include/linux/genhd.h | 6 ++++++ 4 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c index 1dd8fd6613b8..e29805bfa989 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1549,11 +1549,22 @@ void set_disk_ro(struct gendisk *disk, int flag) struct disk_part_iter piter; struct hd_struct *part;
+ /* + * If the user has forced disk read-only with BLKROSET, ignore + * any device state change requested by the driver. + */ + if (disk->part0.policy == DISK_POLICY_USER_WRITE_PROTECT) + return; if (disk->part0.policy != flag) { set_disk_ro_uevent(disk, flag); disk->part0.policy = flag; } - + /* + * If set_disk_ro() is called from revalidate, all partitions + * have already been dropped at this point and thus any + * per-partition user setting lost. Each partition will + * inherit part0 policy when subsequently re-added. + */ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) part->policy = flag; diff --git a/block/ioctl.c b/block/ioctl.c index 4825c78a6baa..16c42e1b18c8 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -451,7 +451,8 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, return ret; if (get_user(n, (int __user *)arg)) return -EFAULT; - set_device_ro(bdev, n); + set_device_ro(bdev, n ? DISK_POLICY_USER_WRITE_PROTECT : + DISK_POLICY_WRITABLE); return 0; }
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b2da8a00ec33..9aa409b38765 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2591,10 +2591,8 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; - int disk_ro = get_disk_ro(sdkp->disk); int old_wp = sdkp->write_prot;
- set_disk_ro(sdkp->disk, 0); if (sdp->skip_ms_page_3f) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); return; @@ -2632,7 +2630,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0); - set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro); + set_disk_ro(sdkp->disk, sdkp->write_prot); if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off"); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 06c0fd594097..2bef434d4dff 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -150,6 +150,12 @@ enum { DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ };
+enum { + DISK_POLICY_WRITABLE = 0, /* Default */ + DISK_POLICY_DEVICE_WRITE_PROTECT = 1, /* Set by device driver */ + DISK_POLICY_USER_WRITE_PROTECT = 2, /* Set via BLKROSET */ +}; + struct disk_part_tbl { struct rcu_head rcu_head; int len;
On 2/8/19 6:38 PM, Martin K. Petersen wrote:
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we have no way to distinguish between a user decision to set a block_device read-only and the disk being write protected as a result of the hardware state.
To overcome this we add a third state to the gendisk read-only policy. This flag is exlusively used when the user forces a struct block_device read-only via BLKROSET. We currently don't allow switching ro state in sysfs so the ioctl is the only entry point for this new state.
In set_disk_ro() we check whether the user override flag is in effect for a disk before changing read-only state based on the device settings. This means that devices that have a delay before going read-write will now be able to clear the read-only state. And devices where the admin or udev has forced the disk read-only will not cause the gendisk policy to reflect the mode reported by the device.
Cc: Jeremy Cline jeremy@jcline.org Cc: Oleksii Kurochko olkuroch@cisco.com Cc: stable@vger.kernel.org # v4.16+ Reported-by: Oleksii Kurochko olkuroch@cisco.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") Signed-off-by: Martin K. Petersen martin.petersen@oracle.com
I have verified that get_disk_ro() and bdev_read_only() callers all handle the additional value correctly. Same is true for "ro" in sysfs.
Note that per-partition ro settings are lost on revalidate. This has been broken for at least a decade and it will require major surgery to fix. To my knowledge nobody has complained about being unable to make partition read-only settings stick through a revalidate. So hopefully this patch will suffice as a simple fix for stable.
Oof, my apologies for this regression. This looks like nice, tidy way to fix it.
block/genhd.c | 13 ++++++++++++- block/ioctl.c | 3 ++- drivers/scsi/sd.c | 4 +--- include/linux/genhd.h | 6 ++++++ 4 files changed, 21 insertions(+), 5 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c index 1dd8fd6613b8..e29805bfa989 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1549,11 +1549,22 @@ void set_disk_ro(struct gendisk *disk, int flag) struct disk_part_iter piter; struct hd_struct *part;
- /*
* If the user has forced disk read-only with BLKROSET, ignore
* any device state change requested by the driver.
*/
- if (disk->part0.policy == DISK_POLICY_USER_WRITE_PROTECT)
return;
I noticed drivers/s390/block/dasd_ioctl.c calls set_disk_ro() to set the policy, where-as the policy is set with set_device_ro() in the generic ioctl.
It's not setting the policy to DISK_POLICY_USER_WRITE_PROTECT so I think it would only be a problem if the user set it to 2 instead of 1 assuming any truthy value is acceptable. Then the user wouldn't be able to mark the disk as writable again since this would be true. Perhaps it's a somewhat far-fetched scenario.
if (disk->part0.policy != flag) { set_disk_ro_uevent(disk, flag); disk->part0.policy = flag; }
- /*
* If set_disk_ro() is called from revalidate, all partitions
* have already been dropped at this point and thus any
* per-partition user setting lost. Each partition will
* inherit part0 policy when subsequently re-added.
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) part->policy = flag;*/
diff --git a/block/ioctl.c b/block/ioctl.c index 4825c78a6baa..16c42e1b18c8 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -451,7 +451,8 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, return ret; if (get_user(n, (int __user *)arg)) return -EFAULT;
- set_device_ro(bdev, n);
- set_device_ro(bdev, n ? DISK_POLICY_USER_WRITE_PROTECT :
return 0; }DISK_POLICY_WRITABLE);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b2da8a00ec33..9aa409b38765 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2591,10 +2591,8 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data;
- int disk_ro = get_disk_ro(sdkp->disk); int old_wp = sdkp->write_prot;
- set_disk_ro(sdkp->disk, 0); if (sdp->skip_ms_page_3f) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); return;
@@ -2632,7 +2630,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0);
set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro);
if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off");set_disk_ro(sdkp->disk, sdkp->write_prot);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 06c0fd594097..2bef434d4dff 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -150,6 +150,12 @@ enum { DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ }; +enum {
- DISK_POLICY_WRITABLE = 0, /* Default */
- DISK_POLICY_DEVICE_WRITE_PROTECT = 1, /* Set by device driver */
- DISK_POLICY_USER_WRITE_PROTECT = 2, /* Set via BLKROSET */
+};
- struct disk_part_tbl { struct rcu_head rcu_head; int len;
Jeremy,
I noticed drivers/s390/block/dasd_ioctl.c calls set_disk_ro() to set the policy, where-as the policy is set with set_device_ro() in the generic ioctl.
There's a subtle distinction here:
- set_disk_ro() sets the policy for a whole disk
- set_device_ro() sets the policy for a block_device (typically partition)
It's not setting the policy to DISK_POLICY_USER_WRITE_PROTECT so I think it would only be a problem if the user set it to 2 instead of 1 assuming any truthy value is acceptable. Then the user wouldn't be able to mark the disk as writable again since this would be true. Perhaps it's a somewhat far-fetched scenario.
OK, I missed that particular entry point. Will fix.
On Fri, Feb 08, 2019 at 06:38:31PM -0500, Martin K. Petersen wrote:
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests.
That is really weird. What kind of devices are these?
Note that per-partition ro settings are lost on revalidate. This has been broken for at least a decade and it will require major surgery to fix. To my knowledge nobody has complained about being unable to make partition read-only settings stick through a revalidate. So hopefully this patch will suffice as a simple fix for stable.
Should we warn when we lost these settings on a revalidate?
I have to say I don't like the tristate too much - it seems to allow setting a hardware write protected device writable again by user interfaction, right?
Should we just have a hardware and a user policy field that are separate instead?
On 2/12/19 9:03 AM, Christoph Hellwig wrote:
On Fri, Feb 08, 2019 at 06:38:31PM -0500, Martin K. Petersen wrote:
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests.
That is really weird. What kind of devices are these?
No, not really. Some arrays (HP IIRC) use a similar mechanism for volume copy or snapshots.
Note that per-partition ro settings are lost on revalidate. This has been broken for at least a decade and it will require major surgery to fix. To my knowledge nobody has complained about being unable to make partition read-only settings stick through a revalidate. So hopefully this patch will suffice as a simple fix for stable.
Should we warn when we lost these settings on a revalidate?
I have to say I don't like the tristate too much - it seems to allow setting a hardware write protected device writable again by user interfaction, right?
Should we just have a hardware and a user policy field that are separate instead?
Problem is how the mechanism should work. Thing is, once a device goes read-only we pretty much stop accessing it (as it will send any filesystem down onto the recovery path). And once we stop sending I/O to it we'll lose the ability to figure out that the device switched back to R/W mode.
(Always assuming that we'll be getting a sense code in the first place).
But overall I have to agree with Christoph. Read-only devices and the handling of which is pretty much array-specific, so I doubt we can generalize much here.
Cheers,
Hannes
Hannes,
And once we stop sending I/O to it we'll lose the ability to figure out that the device switched back to R/W mode.
(Always assuming that we'll be getting a sense code in the first place).
FWIW, I did get correct sense on all the drives I tested and verified that the code does the right thing.
But obviously I have my doubts about $RANDOM_USB_GIZMO.
Christoph,
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests.
That is really weird. What kind of devices are these?
There are several. I have many bug reports, ranging from USB sticks to arrays.
Note that per-partition ro settings are lost on revalidate. This has been broken for at least a decade and it will require major surgery to fix. To my knowledge nobody has complained about being unable to make partition read-only settings stick through a revalidate. So hopefully this patch will suffice as a simple fix for stable.
Should we warn when we lost these settings on a revalidate?
Would be nice. But as I wrote, it's going to require major surgery to the gendisk code to handle this particular scenario correctly since we currently do not keep any partition state between revalidate calls.
I have to say I don't like the tristate too much - it seems to allow setting a hardware write protected device writable again by user interfaction, right?
The original policy was that the user policy was ineffective since the device setting always won.
Jeremy's patch allowed the user setting to override the device setting but broke the case where devices subsequently change state at runtime.
My workaround is that the user ioctl ro state, if set, overrides whatever the device reports. And once the user clears the flag, the current device setting will take effect on revalidate.
Should we just have a hardware and a user policy field that are separate instead?
All this needs to be completely rewritten. However, my attempt at fixing it up properly got pretty convoluted and thus unsuitable for stable.
The intent with this patch was merely as a workaround for people stuck with write-protected drives after boot. The tristate wasn't my first choice, but it turned out to be the path of least resistance for a stable fix.
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we had no way to distinguish between a user decision to set a block_device read-only and the actual hardware device being write-protected.
Because partitions are dropped and recreated on revalidate we are unable to persist any user-provided policy in hd_struct. Introduce a bitmap in struct gendisk to track the user configuration. This bitmap is updated when BLKROSET is called on a given disk or partition.
A helper function, get_user_ro(), is provided to determine whether the ioctl has forced read-only state for a given block device. This helper is used by set_disk_ro() and add_partition() to ensure that both existing and newly created partitions will get the correct state.
- If BLKROSET sets a whole disk device read-only, all partitions will now end up in a read-only state.
- If BLKROSET sets a given partition read-only, that partition will remain read-only post revalidate.
- Otherwise both the whole disk device and any partitions will reflect the write protect state of the underlying device.
Cc: Jeremy Cline jeremy@jcline.org Cc: Oleksii Kurochko olkuroch@cisco.com Cc: stable@vger.kernel.org # v4.16+ Reported-by: Oleksii Kurochko olkuroch@cisco.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") Signed-off-by: Martin K. Petersen martin.petersen@oracle.com
---
v2: - Track user read-only state in a bitmap
- Work around the regression that caused us to drop user preferences on revalidate --- block/genhd.c | 22 +++++++++++++++++----- block/ioctl.c | 4 ++++ block/partition-generic.c | 2 +- drivers/scsi/sd.c | 4 +--- include/linux/genhd.h | 2 ++ 5 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c index 1dd8fd6613b8..34667eb1d3cc 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1544,19 +1544,31 @@ void set_device_ro(struct block_device *bdev, int flag)
EXPORT_SYMBOL(set_device_ro);
+bool get_user_ro(struct gendisk *disk, unsigned int partno) +{ + /* Is the user read-only bit set for the whole disk device? */ + if (test_bit(0, disk->user_ro_bitmap)) + return true; + + /* Is the user read-only bit set for this particular partition? */ + if (test_bit(partno, disk->user_ro_bitmap)) + return true; + + return false; +} +EXPORT_SYMBOL(get_user_ro); + void set_disk_ro(struct gendisk *disk, int flag) { struct disk_part_iter piter; struct hd_struct *part;
- if (disk->part0.policy != flag) { + if (disk->part0.policy != flag) set_disk_ro_uevent(disk, flag); - disk->part0.policy = flag; - }
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0); while ((part = disk_part_iter_next(&piter))) - part->policy = flag; + part->policy = get_user_ro(disk, part->partno) ?: flag; disk_part_iter_exit(&piter); }
diff --git a/block/ioctl.c b/block/ioctl.c index 4825c78a6baa..41206df89485 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -451,6 +451,10 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, return ret; if (get_user(n, (int __user *)arg)) return -EFAULT; + if (n) + set_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap); + else + clear_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap); set_device_ro(bdev, n); return 0; } diff --git a/block/partition-generic.c b/block/partition-generic.c index 8e596a8dff32..c6a3c21c2496 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -338,7 +338,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; - p->policy = get_disk_ro(disk); + p->policy = get_user_ro(disk, partno) ?: get_disk_ro(disk);
if (info) { struct partition_meta_info *pinfo = alloc_part_info(disk); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 67cc439b86e4..5dfe37b08d3b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2591,10 +2591,8 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; - int disk_ro = get_disk_ro(sdkp->disk); int old_wp = sdkp->write_prot;
- set_disk_ro(sdkp->disk, 0); if (sdp->skip_ms_page_3f) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); return; @@ -2632,7 +2630,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0); - set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro); + set_disk_ro(sdkp->disk, sdkp->write_prot); if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off"); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 06c0fd594097..9645c2604465 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -194,6 +194,7 @@ struct gendisk { */ struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; + DECLARE_BITMAP(user_ro_bitmap, DISK_MAX_PARTS);
const struct block_device_operations *fops; struct request_queue *queue; @@ -433,6 +434,7 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); +extern bool get_user_ro(struct gendisk *disk, unsigned int partno);
static inline int get_disk_ro(struct gendisk *disk) {
On 2/13/19 3:57 AM, Martin K. Petersen wrote:
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we had no way to distinguish between a user decision to set a block_device read-only and the actual hardware device being write-protected.
Because partitions are dropped and recreated on revalidate we are unable to persist any user-provided policy in hd_struct. Introduce a bitmap in struct gendisk to track the user configuration. This bitmap is updated when BLKROSET is called on a given disk or partition.
A helper function, get_user_ro(), is provided to determine whether the ioctl has forced read-only state for a given block device. This helper is used by set_disk_ro() and add_partition() to ensure that both existing and newly created partitions will get the correct state.
If BLKROSET sets a whole disk device read-only, all partitions will now end up in a read-only state.
If BLKROSET sets a given partition read-only, that partition will remain read-only post revalidate.
Otherwise both the whole disk device and any partitions will reflect the write protect state of the underlying device.
Cc: Jeremy Cline jeremy@jcline.org Cc: Oleksii Kurochko olkuroch@cisco.com Cc: stable@vger.kernel.org # v4.16+ Reported-by: Oleksii Kurochko olkuroch@cisco.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") Signed-off-by: Martin K. Petersen martin.petersen@oracle.com
v2:
Track user read-only state in a bitmap
Work around the regression that caused us to drop user preferences on revalidate
block/genhd.c | 22 +++++++++++++++++----- block/ioctl.c | 4 ++++ block/partition-generic.c | 2 +- drivers/scsi/sd.c | 4 +--- include/linux/genhd.h | 2 ++ 5 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c index 1dd8fd6613b8..34667eb1d3cc 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1544,19 +1544,31 @@ void set_device_ro(struct block_device *bdev, int flag) EXPORT_SYMBOL(set_device_ro); +bool get_user_ro(struct gendisk *disk, unsigned int partno) +{
- /* Is the user read-only bit set for the whole disk device? */
- if (test_bit(0, disk->user_ro_bitmap))
return true;
- /* Is the user read-only bit set for this particular partition? */
- if (test_bit(partno, disk->user_ro_bitmap))
return true;
- return false;
+} +EXPORT_SYMBOL(get_user_ro);
- void set_disk_ro(struct gendisk *disk, int flag) { struct disk_part_iter piter; struct hd_struct *part;
- if (disk->part0.policy != flag) {
- if (disk->part0.policy != flag) set_disk_ro_uevent(disk, flag);
disk->part0.policy = flag;
- }
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0); while ((part = disk_part_iter_next(&piter)))
part->policy = flag;
disk_part_iter_exit(&piter); }part->policy = get_user_ro(disk, part->partno) ?: flag;
diff --git a/block/ioctl.c b/block/ioctl.c index 4825c78a6baa..41206df89485 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -451,6 +451,10 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, return ret; if (get_user(n, (int __user *)arg)) return -EFAULT;
- if (n)
set_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap);
- else
set_device_ro(bdev, n); return 0; }clear_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap);
diff --git a/block/partition-generic.c b/block/partition-generic.c index 8e596a8dff32..c6a3c21c2496 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -338,7 +338,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno;
- p->policy = get_disk_ro(disk);
- p->policy = get_user_ro(disk, partno) ?: get_disk_ro(disk);
if (info) { struct partition_meta_info *pinfo = alloc_part_info(disk); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 67cc439b86e4..5dfe37b08d3b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2591,10 +2591,8 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data;
- int disk_ro = get_disk_ro(sdkp->disk); int old_wp = sdkp->write_prot;
- set_disk_ro(sdkp->disk, 0); if (sdp->skip_ms_page_3f) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); return;
@@ -2632,7 +2630,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0);
set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro);
if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off");set_disk_ro(sdkp->disk, sdkp->write_prot);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 06c0fd594097..9645c2604465 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -194,6 +194,7 @@ struct gendisk { */ struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0;
- DECLARE_BITMAP(user_ro_bitmap, DISK_MAX_PARTS);
const struct block_device_operations *fops; struct request_queue *queue; @@ -433,6 +434,7 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); +extern bool get_user_ro(struct gendisk *disk, unsigned int partno); static inline int get_disk_ro(struct gendisk *disk) {
Hmm. Can't say I like it. But as we're dropping partitions on revalidate I guess we'll have to do it that way. Oh well.
Reviewed-by: Hannes Reinecke hare@suse.com
Cheers,
Hannes
Christoph? Jens?
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we had no way to distinguish between a user decision to set a block_device read-only and the actual hardware device being write-protected.
Because partitions are dropped and recreated on revalidate we are unable to persist any user-provided policy in hd_struct. Introduce a bitmap in struct gendisk to track the user configuration. This bitmap is updated when BLKROSET is called on a given disk or partition.
A helper function, get_user_ro(), is provided to determine whether the ioctl has forced read-only state for a given block device. This helper is used by set_disk_ro() and add_partition() to ensure that both existing and newly created partitions will get the correct state.
If BLKROSET sets a whole disk device read-only, all partitions will now end up in a read-only state.
If BLKROSET sets a given partition read-only, that partition will remain read-only post revalidate.
Otherwise both the whole disk device and any partitions will reflect the write protect state of the underlying device.
Cc: Jeremy Cline jeremy@jcline.org Cc: Oleksii Kurochko olkuroch@cisco.com Cc: stable@vger.kernel.org # v4.16+ Reported-by: Oleksii Kurochko olkuroch@cisco.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") Signed-off-by: Martin K. Petersen martin.petersen@oracle.com
v2:
Track user read-only state in a bitmap
Work around the regression that caused us to drop user preferences on revalidate
block/genhd.c | 22 +++++++++++++++++----- block/ioctl.c | 4 ++++ block/partition-generic.c | 2 +- drivers/scsi/sd.c | 4 +--- include/linux/genhd.h | 2 ++ 5 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c index 1dd8fd6613b8..34667eb1d3cc 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1544,19 +1544,31 @@ void set_device_ro(struct block_device *bdev, int flag) EXPORT_SYMBOL(set_device_ro); +bool get_user_ro(struct gendisk *disk, unsigned int partno) +{
- /* Is the user read-only bit set for the whole disk device? */
- if (test_bit(0, disk->user_ro_bitmap))
return true;
- /* Is the user read-only bit set for this particular partition? */
- if (test_bit(partno, disk->user_ro_bitmap))
return true;
- return false;
+} +EXPORT_SYMBOL(get_user_ro);
void set_disk_ro(struct gendisk *disk, int flag) { struct disk_part_iter piter; struct hd_struct *part;
- if (disk->part0.policy != flag) {
- if (disk->part0.policy != flag) set_disk_ro_uevent(disk, flag);
disk->part0.policy = flag;
- }
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0); while ((part = disk_part_iter_next(&piter)))
part->policy = flag;
disk_part_iter_exit(&piter);part->policy = get_user_ro(disk, part->partno) ?: flag;
} diff --git a/block/ioctl.c b/block/ioctl.c index 4825c78a6baa..41206df89485 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -451,6 +451,10 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, return ret; if (get_user(n, (int __user *)arg)) return -EFAULT;
- if (n)
set_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap);
- else
set_device_ro(bdev, n); return 0;clear_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap);
} diff --git a/block/partition-generic.c b/block/partition-generic.c index 8e596a8dff32..c6a3c21c2496 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -338,7 +338,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno;
- p->policy = get_disk_ro(disk);
- p->policy = get_user_ro(disk, partno) ?: get_disk_ro(disk);
if (info) { struct partition_meta_info *pinfo = alloc_part_info(disk); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 67cc439b86e4..5dfe37b08d3b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2591,10 +2591,8 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data;
- int disk_ro = get_disk_ro(sdkp->disk); int old_wp = sdkp->write_prot;
- set_disk_ro(sdkp->disk, 0); if (sdp->skip_ms_page_3f) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); return;
@@ -2632,7 +2630,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0);
set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro);
if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off");set_disk_ro(sdkp->disk, sdkp->write_prot);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 06c0fd594097..9645c2604465 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -194,6 +194,7 @@ struct gendisk { */ struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0;
- DECLARE_BITMAP(user_ro_bitmap, DISK_MAX_PARTS);
const struct block_device_operations *fops; struct request_queue *queue; @@ -433,6 +434,7 @@ extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); +extern bool get_user_ro(struct gendisk *disk, unsigned int partno); static inline int get_disk_ro(struct gendisk *disk) {
On Wed, Feb 13, 2019 at 5:01 PM Martin K. Petersen martin.petersen@oracle.com wrote:
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we had no way to distinguish between a user decision to set a block_device read-only and the actual hardware device being write-protected.
Because partitions are dropped and recreated on revalidate we are unable to persist any user-provided policy in hd_struct. Introduce a bitmap in struct gendisk to track the user configuration. This bitmap is updated when BLKROSET is called on a given disk or partition.
A helper function, get_user_ro(), is provided to determine whether the ioctl has forced read-only state for a given block device. This helper is used by set_disk_ro() and add_partition() to ensure that both existing and newly created partitions will get the correct state.
Hi Martin & Oleksii,
From the Bugzilla, looks it is only reported on the "Kingston DT Ultimate G3"
USB flash drive.
If it is true, this particular issue might be addressed simply by applying one quirk on this drive, such as, by adding one delay before calling sd_spinup_disk() in the 1st sd_revalidate_disk() to wait until it is ready to process I/O requests.
Otherwise if there are many such devices, I think your approach is good.
Thanks, Ming Lei
Hi Ming,
From the Bugzilla, looks it is only reported on the "Kingston DT Ultimate G3" USB flash drive.
There are several bug reports as a result of the change. It's not at all uncommon for a device to switch at runtime. Before Jeremy's change we just didn't see it because the device setting always took precedence and voided any user setting.
On Tue, Feb 12, 2019 at 09:57:17PM -0500, Martin K. Petersen wrote:
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we had no way to distinguish between a user decision to set a block_device read-only and the actual hardware device being write-protected.
Because partitions are dropped and recreated on revalidate we are unable to persist any user-provided policy in hd_struct. Introduce a bitmap in struct gendisk to track the user configuration. This bitmap is updated when BLKROSET is called on a given disk or partition.
A helper function, get_user_ro(), is provided to determine whether the ioctl has forced read-only state for a given block device. This helper is used by set_disk_ro() and add_partition() to ensure that both existing and newly created partitions will get the correct state.
If BLKROSET sets a whole disk device read-only, all partitions will now end up in a read-only state.
If BLKROSET sets a given partition read-only, that partition will remain read-only post revalidate.
Otherwise both the whole disk device and any partitions will reflect the write protect state of the underlying device.
Cc: Jeremy Cline jeremy@jcline.org Cc: Oleksii Kurochko olkuroch@cisco.com Cc: stable@vger.kernel.org # v4.16+ Reported-by: Oleksii Kurochko olkuroch@cisco.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") Signed-off-by: Martin K. Petersen martin.petersen@oracle.com
v2:
Track user read-only state in a bitmap
Work around the regression that caused us to drop user preferences on revalidate
block/genhd.c | 22 +++++++++++++++++----- block/ioctl.c | 4 ++++ block/partition-generic.c | 2 +- drivers/scsi/sd.c | 4 +--- include/linux/genhd.h | 2 ++ 5 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c index 1dd8fd6613b8..34667eb1d3cc 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1544,19 +1544,31 @@ void set_device_ro(struct block_device *bdev, int flag) EXPORT_SYMBOL(set_device_ro); +bool get_user_ro(struct gendisk *disk, unsigned int partno) +{
- /* Is the user read-only bit set for the whole disk device? */
- if (test_bit(0, disk->user_ro_bitmap))
return true;
- /* Is the user read-only bit set for this particular partition? */
- if (test_bit(partno, disk->user_ro_bitmap))
return true;
- return false;
+} +EXPORT_SYMBOL(get_user_ro);
No need to export this function.
- p->policy = get_user_ro(disk, partno) ?: get_disk_ro(disk);
Can we avoid the obsfucating non-standard (GNU extension) use of ?: here? Just use a local variable and a good old if.
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we had no way to distinguish between a user decision to set a block_device read-only and the actual hardware device being write-protected.
Because partitions are dropped and recreated on revalidate we are unable to persist any user-provided policy in hd_struct. Introduce a bitmap in struct gendisk to track the user configuration. This bitmap is updated when BLKROSET is called on a given disk or partition.
A helper function, get_user_ro(), is provided to determine whether the ioctl has forced read-only state for a given block device. This helper is used by set_disk_ro() and add_partition() to ensure that both existing and newly created partitions will get the correct state.
- If BLKROSET sets a whole disk device read-only, all partitions will now end up in a read-only state.
- If BLKROSET sets a given partition read-only, that partition will remain read-only post revalidate.
- Otherwise both the whole disk device and any partitions will reflect the write protect state of the underlying device.
Since nobody knows what "policy" means, rename the field to "read_only" for clarity.
Cc: Jeremy Cline jeremy@jcline.org Cc: Oleksii Kurochko olkuroch@cisco.com Cc: stable@vger.kernel.org # v4.16+ Reported-by: Oleksii Kurochko olkuroch@cisco.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") Signed-off-by: Martin K. Petersen martin.petersen@oracle.com
---
v3: - Drop ?: since gcc complains about mixing int and bool (zeroday) - Drop EXPORT_SYMBOL (hch) - s/policy/read_only/ and make it a boolean
v2: - Track user read-only state in a bitmap
- Work around the regression that caused us to drop user preferences on revalidate --- block/blk-core.c | 2 +- block/genhd.c | 34 ++++++++++++++++++++++++---------- block/ioctl.c | 4 ++++ block/partition-generic.c | 7 +++++-- drivers/scsi/sd.c | 4 +--- include/linux/genhd.h | 11 +++++++---- 6 files changed, 42 insertions(+), 20 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c index c78042975737..c28d7ee3a05e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -790,7 +790,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) { const int op = bio_op(bio);
- if (part->policy && op_is_write(op)) { + if (part->read_only && op_is_write(op)) { char b[BDEVNAME_SIZE];
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) diff --git a/block/genhd.c b/block/genhd.c index 1dd8fd6613b8..c07037c0b03f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1537,26 +1537,40 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro) kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); }
-void set_device_ro(struct block_device *bdev, int flag) +void set_device_ro(struct block_device *bdev, bool state) { - bdev->bd_part->policy = flag; + bdev->bd_part->read_only = state; }
EXPORT_SYMBOL(set_device_ro);
-void set_disk_ro(struct gendisk *disk, int flag) +bool get_user_ro(struct gendisk *disk, unsigned int partno) +{ + /* Is the user read-only bit set for the whole disk device? */ + if (test_bit(0, disk->user_ro_bitmap)) + return true; + + /* Is the user read-only bit set for this particular partition? */ + if (test_bit(partno, disk->user_ro_bitmap)) + return true; + + return false; +} + +void set_disk_ro(struct gendisk *disk, bool state) { struct disk_part_iter piter; struct hd_struct *part;
- if (disk->part0.policy != flag) { - set_disk_ro_uevent(disk, flag); - disk->part0.policy = flag; - } + if (disk->part0.read_only != state) + set_disk_ro_uevent(disk, state);
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0); while ((part = disk_part_iter_next(&piter))) - part->policy = flag; + if (get_user_ro(disk, part->partno)) + part->read_only = true; + else + part->read_only = state; disk_part_iter_exit(&piter); }
@@ -1566,7 +1580,7 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - return bdev->bd_part->policy; + return bdev->bd_part->read_only; }
EXPORT_SYMBOL(bdev_read_only); diff --git a/block/ioctl.c b/block/ioctl.c index 4825c78a6baa..41206df89485 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -451,6 +451,10 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, return ret; if (get_user(n, (int __user *)arg)) return -EFAULT; + if (n) + set_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap); + else + clear_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap); set_device_ro(bdev, n); return 0; } diff --git a/block/partition-generic.c b/block/partition-generic.c index 8e596a8dff32..2bade849cc5c 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -98,7 +98,7 @@ static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%d\n", p->policy ? 1 : 0); + return sprintf(buf, "%u\n", p->read_only ? 1 : 0); }
static ssize_t part_alignment_offset_show(struct device *dev, @@ -338,7 +338,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; - p->policy = get_disk_ro(disk); + if (get_user_ro(disk, partno)) + p->read_only = true; + else + p->read_only = get_disk_ro(disk);
if (info) { struct partition_meta_info *pinfo = alloc_part_info(disk); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 67cc439b86e4..5dfe37b08d3b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2591,10 +2591,8 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) int res; struct scsi_device *sdp = sdkp->device; struct scsi_mode_data data; - int disk_ro = get_disk_ro(sdkp->disk); int old_wp = sdkp->write_prot;
- set_disk_ro(sdkp->disk, 0); if (sdp->skip_ms_page_3f) { sd_first_printk(KERN_NOTICE, sdkp, "Assuming Write Enabled\n"); return; @@ -2632,7 +2630,7 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, unsigned char *buffer) "Test WP failed, assume Write Enabled\n"); } else { sdkp->write_prot = ((data.device_specific & 0x80) != 0); - set_disk_ro(sdkp->disk, sdkp->write_prot || disk_ro); + set_disk_ro(sdkp->disk, sdkp->write_prot); if (sdkp->first_scan || old_wp != sdkp->write_prot) { sd_printk(KERN_NOTICE, sdkp, "Write Protect is %s\n", sdkp->write_prot ? "on" : "off"); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 06c0fd594097..1abde0e88ccb 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -118,7 +118,8 @@ struct hd_struct { unsigned int discard_alignment; struct device __dev; struct kobject *holder_dir; - int policy, partno; + bool read_only; + int partno; struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; @@ -194,6 +195,7 @@ struct gendisk { */ struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; + DECLARE_BITMAP(user_ro_bitmap, DISK_MAX_PARTS);
const struct block_device_operations *fops; struct request_queue *queue; @@ -431,12 +433,13 @@ extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
-extern void set_device_ro(struct block_device *bdev, int flag); -extern void set_disk_ro(struct gendisk *disk, int flag); +extern void set_device_ro(struct block_device *bdev, bool state); +extern void set_disk_ro(struct gendisk *disk, bool state); +extern bool get_user_ro(struct gendisk *disk, unsigned int partno);
static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.policy; + return disk->part0.read_only; }
extern void disk_block_events(struct gendisk *disk);
linux-stable-mirror@lists.linaro.org