- Linux-stable-mirror - lists.linaro.org

by Elizabeth White

I HAVE AN IMPORTANT MESSAGE TO DISCUSS WITH YOU, PLEASE WRITE BACK TO ME FOR DETAILS. REGARDS, ELIZABETH WHITE -----Sent from IPad

7 years, 8 months

1
0
0 0

[PATCH 5/5] sd_zbc: Avoid that resetting a zone fails sporadically

by Bart Van Assche

Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is called from sd_probe_async() and since sd_revalidate_disk() calls sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called concurrently with blkdev_report_zones() and/or blkdev_reset_zones(). That can cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets q->nr_zones to zero before restoring it to the actual value, even if no drive characteristics have changed. Avoid that this can happen by making the following changes: - Protect the code that updates zone information with blk_queue_enter() and blk_queue_exit(). - Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that these functions do not modify struct scsi_disk before all zone information has been obtained. Note: since commit 055f6e18e08f ("block: Make q_usage_counter also track legacy requests"; kernel v4.15) the request queue freezing mechanism also affects legacy request queues. Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com> Cc: Jens Axboe <axboe(a)kernel.dk> Cc: Damien Le Moal <damien.lemoal(a)wdc.com> Cc: Christoph Hellwig <hch(a)lst.de> Cc: Hannes Reinecke <hare(a)suse.com> Cc: stable(a)vger.kernel.org --- drivers/scsi/sd_zbc.c | 140 +++++++++++++++++++++++++++++-------------------- include/linux/blkdev.h | 5 ++ 2 files changed, 87 insertions(+), 58 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index f290e8a9923d..88e1e80fa497 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -378,8 +378,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf) * * Check that all zones of the device are equal. The last zone can however * be smaller. The zone size must also be a power of two number of LBAs. + * + * Returns the zone size in bytes upon success or an error code upon failure. */ -static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) +static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp) { u64 zone_blocks = 0; sector_t block = 0; @@ -390,8 +392,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) int ret; u8 same; - sdkp->zone_blocks = 0; - /* Get a buffer */ buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); if (!buf) @@ -423,16 +423,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) /* Parse zone descriptors */ while (rec < buf + buf_len) { - zone_blocks = get_unaligned_be64(&rec[8]); - if (sdkp->zone_blocks == 0) { - sdkp->zone_blocks = zone_blocks; - } else if (zone_blocks != sdkp->zone_blocks && - (block + zone_blocks < sdkp->capacity - || zone_blocks > sdkp->zone_blocks)) { - zone_blocks = 0; + u64 this_zone_blocks = get_unaligned_be64(&rec[8]); + + if (zone_blocks == 0) { + zone_blocks = this_zone_blocks; + } else if (this_zone_blocks != zone_blocks && + (block + this_zone_blocks < sdkp->capacity + || this_zone_blocks > zone_blocks)) { + this_zone_blocks = 0; goto out; } - block += zone_blocks; + block += this_zone_blocks; rec += 64; } @@ -445,8 +446,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) } while (block < sdkp->capacity); - zone_blocks = sdkp->zone_blocks; - out: if (!zone_blocks) { if (sdkp->first_scan) @@ -466,8 +465,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) "Zone size too large\n"); ret = -ENODEV; } else { - sdkp->zone_blocks = zone_blocks; - sdkp->zone_shift = ilog2(zone_blocks); + ret = zone_blocks; } out_free: @@ -478,15 +476,14 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) /** * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone). - * @sdkp: The disk of the bitmap + * @nr_zones: Number of zones to allocate space for. + * @numa_node: NUMA node to allocate the memory from. */ -static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) +static inline unsigned long * +sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node) { - struct request_queue *q = sdkp->disk->queue; - - return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones) - * sizeof(unsigned long), - GFP_KERNEL, q->node); + return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long), + GFP_KERNEL, numa_node); } /** @@ -494,6 +491,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) * @sdkp: disk used * @buf: report reply buffer * @buflen: length of @buf + * @zone_shift: logarithm base 2 of the number of blocks in a zone * @seq_zones_bitmap: bitmap of sequential zones to set * * Parse reported zone descriptors in @buf to identify sequential zones and @@ -503,7 +501,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) * Return the LBA after the last zone reported. */ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, - unsigned int buflen, + unsigned int buflen, u32 zone_shift, unsigned long *seq_zones_bitmap) { sector_t lba, next_lba = sdkp->capacity; @@ -522,7 +520,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, if (type != ZBC_ZONE_TYPE_CONV && cond != ZBC_ZONE_COND_READONLY && cond != ZBC_ZONE_COND_OFFLINE) - set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap); + set_bit(lba >> zone_shift, seq_zones_bitmap); next_lba = lba + get_unaligned_be64(&rec[8]); rec += 64; } @@ -531,12 +529,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, } /** - * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap. + * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap. * @sdkp: target disk + * @zone_shift: logarithm base 2 of the number of blocks in a zone + * @nr_zones: number of zones to set up a seq zone bitmap for * * Allocate a zone bitmap and initialize it by identifying sequential zones. */ -static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) +static unsigned long * +sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift, + u32 nr_zones) { struct request_queue *q = sdkp->disk->queue; unsigned long *seq_zones_bitmap; @@ -544,9 +546,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) unsigned char *buf; int ret = -ENOMEM; - seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp); + seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node); if (!seq_zones_bitmap) - return -ENOMEM; + return ERR_PTR(-ENOMEM); buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); if (!buf) @@ -557,7 +559,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) if (ret) goto out; lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE, - seq_zones_bitmap); + zone_shift, seq_zones_bitmap); } if (lba != sdkp->capacity) { @@ -569,12 +571,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) kfree(buf); if (ret) { kfree(seq_zones_bitmap); - return ret; + return ERR_PTR(ret); } - - q->seq_zones_bitmap = seq_zones_bitmap; - - return 0; + return seq_zones_bitmap; } static void sd_zbc_cleanup(struct scsi_disk *sdkp) @@ -590,44 +589,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp) q->nr_zones = 0; } -static int sd_zbc_setup(struct scsi_disk *sdkp) +static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks) { struct request_queue *q = sdkp->disk->queue; + u32 zone_shift = ilog2(zone_blocks); + u32 nr_zones; int ret; - /* READ16/WRITE16 is mandatory for ZBC disks */ - sdkp->device->use_16_for_rw = 1; - sdkp->device->use_10_for_rw = 0; - /* chunk_sectors indicates the zone size */ - blk_queue_chunk_sectors(sdkp->disk->queue, - logical_to_sectors(sdkp->device, sdkp->zone_blocks)); - sdkp->nr_zones = - round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift; + blk_queue_chunk_sectors(q, + logical_to_sectors(sdkp->device, zone_blocks)); + nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift; /* * Initialize the device request queue information if the number * of zones changed. */ - if (sdkp->nr_zones != q->nr_zones) { - - sd_zbc_cleanup(sdkp); - - q->nr_zones = sdkp->nr_zones; - if (sdkp->nr_zones) { - q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); - if (!q->seq_zones_wlock) { + if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) { + unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; + size_t zone_bitmap_size; + + if (nr_zones) { + seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones, + q->node); + if (!seq_zones_wlock) { ret = -ENOMEM; goto err; } - ret = sd_zbc_setup_seq_zones_bitmap(sdkp); - if (ret) { - sd_zbc_cleanup(sdkp); + seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp, + zone_shift, nr_zones); + if (IS_ERR(seq_zones_bitmap)) { + ret = PTR_ERR(seq_zones_bitmap); + kfree(seq_zones_wlock); goto err; } } - + zone_bitmap_size = BITS_TO_LONGS(nr_zones) * + sizeof(unsigned long); + blk_mq_freeze_queue(q); + if (q->nr_zones != nr_zones) { + /* READ16/WRITE16 is mandatory for ZBC disks */ + sdkp->device->use_16_for_rw = 1; + sdkp->device->use_10_for_rw = 0; + + sdkp->zone_blocks = zone_blocks; + sdkp->zone_shift = zone_shift; + sdkp->nr_zones = nr_zones; + q->nr_zones = nr_zones; + swap(q->seq_zones_wlock, seq_zones_wlock); + swap(q->seq_zones_bitmap, seq_zones_bitmap); + } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size) != 0) { + memcpy(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size); + } + blk_mq_unfreeze_queue(q); + kfree(seq_zones_wlock); + kfree(seq_zones_bitmap); } return 0; @@ -639,6 +658,7 @@ static int sd_zbc_setup(struct scsi_disk *sdkp) int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) { + int64_t zone_blocks; int ret; if (!sd_is_zoned(sdkp)) @@ -675,12 +695,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) * Check zone size: only devices with a constant zone size (except * an eventual last runt zone) that is a power of 2 are supported. */ - ret = sd_zbc_check_zone_size(sdkp); - if (ret) + zone_blocks = sd_zbc_check_zone_size(sdkp); + ret = -EFBIG; + if (zone_blocks != (u32)zone_blocks) + goto err; + ret = zone_blocks; + if (ret < 0) goto err; /* The drive satisfies the kernel restrictions: set it up */ - ret = sd_zbc_setup(sdkp); + ret = sd_zbc_setup(sdkp, zone_blocks); if (ret) goto err; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9af3e0f430bc..21e21f273a21 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -605,6 +605,11 @@ struct request_queue { * initialized by the low level device driver (e.g. scsi/sd.c). * Stacking drivers (device mappers) may or may not initialize * these fields. + * + * Reads of this information must be protected with blk_queue_enter() / + * blk_queue_exit(). Modifying this information is only allowed while + * no requests are being processed. See also blk_mq_freeze_queue() and + * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; unsigned long *seq_zones_bitmap; -- 2.16.3

7 years, 8 months

1
0
0 0

[PATCH 4/5] block: Avoid executing a report or reset zones while a queue is frozen

by Bart Van Assche

This change will be used by the next patch to prevent that zoned block device ioctls are executed while the queue is frozen. Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com> Cc: Jens Axboe <axboe(a)kernel.dk> Cc: Damien Le Moal <damien.lemoal(a)wdc.com> Cc: Christoph Hellwig <hch(a)lst.de> Cc: Hannes Reinecke <hare(a)suse.com> Cc: stable(a)vger.kernel.org --- block/blk-zoned.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 08e84ef2bc05..249fcd747926 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -127,15 +127,19 @@ int blkdev_report_zones(struct block_device *bdev, if (!q) return -ENXIO; + blk_queue_enter(q, 0); + + ret = -EOPNOTSUPP; if (!blk_queue_is_zoned(q)) - return -EOPNOTSUPP; + goto exit_queue; + ret = 0; if (!nrz) - return 0; + goto exit_queue; if (sector > bdev->bd_part->nr_sects) { *nr_zones = 0; - return 0; + goto exit_queue; } /* @@ -154,9 +158,10 @@ int blkdev_report_zones(struct block_device *bdev, nr_pages = min_t(unsigned int, nr_pages, queue_max_segments(q)); + ret = -ENOMEM; bio = bio_alloc(gfp_mask, nr_pages); if (!bio) - return -ENOMEM; + goto exit_queue; bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = blk_zone_start(q, sector); @@ -166,7 +171,7 @@ int blkdev_report_zones(struct block_device *bdev, page = alloc_page(gfp_mask); if (!page) { ret = -ENOMEM; - goto out; + goto put_bio; } if (!bio_add_page(bio, page, PAGE_SIZE, 0)) { __free_page(page); @@ -179,7 +184,7 @@ int blkdev_report_zones(struct block_device *bdev, else ret = submit_bio_wait(bio); if (ret) - goto out; + goto put_bio; /* * Process the report result: skip the header and go through the @@ -222,11 +227,14 @@ int blkdev_report_zones(struct block_device *bdev, } *nr_zones = nz; -out: +put_bio: bio_for_each_segment_all(bv, bio, i) __free_page(bv->bv_page); bio_put(bio); +exit_queue: + blk_queue_exit(q); + return ret; } EXPORT_SYMBOL_GPL(blkdev_report_zones); @@ -256,21 +264,25 @@ int blkdev_reset_zones(struct block_device *bdev, if (!q) return -ENXIO; + blk_queue_enter(q, 0); + + ret = -EOPNOTSUPP; if (!blk_queue_is_zoned(q)) - return -EOPNOTSUPP; + goto out; + ret = -EINVAL; if (end_sector > bdev->bd_part->nr_sects) /* Out of range */ - return -EINVAL; + goto out; /* Check alignment (handle eventual smaller last zone) */ zone_sectors = blk_queue_zone_sectors(q); if (sector & (zone_sectors - 1)) - return -EINVAL; + goto out; if ((nr_sectors & (zone_sectors - 1)) && end_sector != bdev->bd_part->nr_sects) - return -EINVAL; + goto out; while (sector < end_sector) { @@ -283,7 +295,7 @@ int blkdev_reset_zones(struct block_device *bdev, bio_put(bio); if (ret) - return ret; + goto out; sector += zone_sectors; @@ -292,7 +304,11 @@ int blkdev_reset_zones(struct block_device *bdev, } - return 0; + ret = 0; + +out: + blk_queue_exit(q); + return ret; } EXPORT_SYMBOL_GPL(blkdev_reset_zones); -- 2.16.3

7 years, 8 months

1
0
0 0

[PATCH 3/5] sd_zbc: Log zone reset failures

by Bart Van Assche

A zone reset only fails for conventional zones. Resetting a conventional zone indicates a bug in the software that submitted the zone reset so the information about a zone reset failure is useful information. Hence, do not suppress information about zone reset failures. Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com> Cc: Damien Le Moal <damien.lemoal(a)wdc.com> Cc: Christoph Hellwig <hch(a)lst.de> Cc: Hannes Reinecke <hare(a)suse.com> Cc: stable(a)vger.kernel.org --- drivers/scsi/sd_zbc.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 2d0c06f7db3e..f290e8a9923d 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -284,18 +284,6 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, switch (req_op(rq)) { case REQ_OP_ZONE_RESET: - - if (result && - sshdr->sense_key == ILLEGAL_REQUEST && - sshdr->asc == 0x24) - /* - * INVALID FIELD IN CDB error: reset of a conventional - * zone was attempted. Nothing to worry about, so be - * quiet about the error. - */ - rq->rq_flags |= RQF_QUIET; - break; - case REQ_OP_WRITE: case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_SAME: -- 2.16.3

7 years, 8 months

1
0
0 0

[PATCH 2/5] sd_zbc: Let the SCSI core handle ILLEGAL REQUEST / ASC 0x21

by Bart Van Assche

scsi_io_completion() translates the sense key ILLEGAL REQUEST / ASC 0x21 into ACTION_FAIL. That means that setting cmd->allowed to zero in sd_zbc_complete() for this sense code / ASC combination is not necessary. Hence remove the code that resets cmd->allowed from sd_zbc_complete(). Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com> Cc: Damien Le Moal <damien.lemoal(a)wdc.com> Cc: Christoph Hellwig <hch(a)lst.de> Cc: Hannes Reinecke <hare(a)suse.com> Cc: stable(a)vger.kernel.org --- drivers/scsi/sd_zbc.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 41df75eea57b..2d0c06f7db3e 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -299,16 +299,6 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, case REQ_OP_WRITE: case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_SAME: - - if (result && - sshdr->sense_key == ILLEGAL_REQUEST && - sshdr->asc == 0x21) - /* - * INVALID ADDRESS FOR WRITE error: It is unlikely that - * retrying write requests failed with any kind of - * alignement error will result in success. So don't. - */ - cmd->allowed = 0; break; case REQ_OP_ZONE_REPORT: -- 2.16.3

7 years, 8 months

1
0
0 0

[PATCH 1/5] sd_zbc: Change the type of the ZBC fields into u32

by Bart Van Assche

This patch does not change any functionality but makes it clear that it is on purpose that these fields are 32 bits wide. Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com> Cc: Damien Le Moal <damien.lemoal(a)wdc.com> Cc: Christoph Hellwig <hch(a)lst.de> Cc: Hannes Reinecke <hare(a)suse.com> Cc: stable(a)vger.kernel.org --- drivers/scsi/sd.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 0d663b5e45bb..392c7d078ae3 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -74,12 +74,12 @@ struct scsi_disk { struct gendisk *disk; struct opal_dev *opal_dev; #ifdef CONFIG_BLK_DEV_ZONED - unsigned int nr_zones; - unsigned int zone_blocks; - unsigned int zone_shift; - unsigned int zones_optimal_open; - unsigned int zones_optimal_nonseq; - unsigned int zones_max_open; + u32 nr_zones; + u32 zone_blocks; + u32 zone_shift; + u32 zones_optimal_open; + u32 zones_optimal_nonseq; + u32 zones_max_open; #endif atomic_t openers; sector_t capacity; /* size in logical blocks */ -- 2.16.3

7 years, 8 months

1
0
0 0

[PATCH 24/24] x86/spectre_v2: Don't check microcode versions when running under hypervisors

by Youquan Song

From: Konrad Rzeszutek Wilk <konrad.wilk(a)oracle.com> (cherry picked from commit 36268223c1e9981d6cfc33aff8520b3bde4b8114) As: 1) It's known that hypervisors lie about the environment anyhow (host mismatch) 2) Even if the hypervisor (Xen, KVM, VMWare, etc) provided a valid "correct" value, it all gets to be very murky when migration happens (do you provide the "new" microcode of the machine?). And in reality the cloud vendors are the ones that should make sure that the microcode that is running is correct and we should just sing lalalala and trust them. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk(a)oracle.com> Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de> Reviewed-by: Paolo Bonzini <pbonzini(a)redhat.com> Cc: Wanpeng Li <kernellwp(a)gmail.com> Cc: kvm <kvm(a)vger.kernel.org> Cc: Krčmář <rkrcmar(a)redhat.com> Cc: Borislav Petkov <bp(a)alien8.de> CC: "H. Peter Anvin" <hpa(a)zytor.com> CC: stable(a)vger.kernel.org Link: https://lkml.kernel.org/r/20180226213019.GE9497@char.us.oracle.com Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> [Yi Sun: port to 4.4] Signed-off-by: Yi Sun <yi.y.sun(a)linux.intel.com> --- arch/x86/kernel/cpu/intel.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index af28610..221c030 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -71,6 +71,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) { int i; + /* + * We know that the hypervisor lie to us on the microcode version so + * we may as well hope that it is running the correct version. + */ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return false; + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && c->x86_mask == spectre_bad_microcodes[i].stepping) -- 1.9.1

7 years, 8 months

1
0
0 0

[PATCH 23/23] KVM/VMX: Optimize vmx_vcpu_run() and svm_vcpu_run() by marking the RDMSR path as unlikely()

by Youquan Song

From: Paolo Bonzini <pbonzini(a)redhat.com> commit 946fbbc13dce68902f64515b610eeb2a6c3d7a64 upstream. vmx_vcpu_run() and svm_vcpu_run() are large functions, and giving branch hints to the compiler can actually make a substantial cycle difference by keeping the fast path contiguous in memory. With this optimization, the retpoline-guest/retpoline-host case is about 50 cycles faster. Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> Reviewed-by: Jim Mattson <jmattson(a)google.com> Cc: David Woodhouse <dwmw(a)amazon.co.uk> Cc: KarimAllah Ahmed <karahmed(a)amazon.de> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Radim Krčmář <rkrcmar(a)redhat.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: kvm(a)vger.kernel.org Cc: stable(a)vger.kernel.org Link: http://lkml.kernel.org/r/20180222154318.20361-3-pbonzini@redhat.com Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> [Yi Sun: cherry pick to 4.4] Signed-off-by: Yi Sun <yi.y.sun(a)linux.intel.com> --- arch/x86/kvm/svm.c | 2 +- arch/x86/kvm/vmx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cb46661..35da1e2 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4017,7 +4017,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6c109df..1f165cc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8743,7 +8743,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (vmx->spec_ctrl) -- 1.9.1

7 years, 8 months

1
0
0 0

[PATCH 22/23] KVM/x86: Remove indirect MSR op calls from SPEC_CTRL

by Youquan Song

From: Paolo Bonzini <pbonzini(a)redhat.com> commit ecb586bd29c99fb4de599dec388658e74388daad upstream. Having a paravirt indirect call in the IBRS restore path is not a good idea, since we are trying to protect from speculative execution of bogus indirect branch targets. It is also slower, so use native_wrmsrl() on the vmentry path too. Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> Reviewed-by: Jim Mattson <jmattson(a)google.com> Cc: David Woodhouse <dwmw(a)amazon.co.uk> Cc: KarimAllah Ahmed <karahmed(a)amazon.de> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Peter Zijlstra <peterz(a)infradead.org> Cc: Radim Krčmář <rkrcmar(a)redhat.com> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: kvm(a)vger.kernel.org Cc: stable(a)vger.kernel.org Fixes: d28b387fb74da95d69d2615732f50cceb38e9a4d Link: http://lkml.kernel.org/r/20180222154318.20361-2-pbonzini@redhat.com Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> [Yi Sun: port to 4.4] Signed-off-by: Yi Sun <yi.y.sun(a)linux.intel.com> Conflicts: arch/x86/kvm/svm.c --- arch/x86/kvm/svm.c | 7 ++++--- arch/x86/kvm/vmx.c | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 08d42a9..cb46661 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -37,6 +37,7 @@ #include <asm/desc.h> #include <asm/debugreg.h> #include <asm/kvm_para.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h> #include <asm/virtext.h> @@ -3907,7 +3908,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -4017,10 +4018,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 618174b..6c109df 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -48,6 +48,7 @@ #include <asm/kexec.h> #include <asm/apic.h> #include <asm/irq_remapping.h> +#include <asm/microcode.h> #include <asm/nospec-branch.h> #include "trace.h" @@ -8607,7 +8608,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); vmx->__launched = vmx->loaded_vmcs->launched; asm( @@ -8743,10 +8744,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * save it. */ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); -- 1.9.1

7 years, 8 months

1
0
0 0

[PATCH 01/24] x86/cpufeatures: Add CPUID_7_EDX CPUID leaf

by Youquan Song

From: David Woodhouse <dwmw(a)amazon.co.uk> (cherry picked from commit 95ca0ee8636059ea2800dfbac9ecac6212d6b38f) This is a pure feature bits leaf. There are two AVX512 feature bits in it already which were handled as scattered bits, and three more from this leaf are going to be added for speculation control features. Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk> Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de> Reviewed-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Reviewed-by: Borislav Petkov <bp(a)suse.de> Cc: gnomes(a)lxorguk.ukuu.org.uk Cc: ak(a)linux.intel.com Cc: ashok.raj(a)intel.com Cc: dave.hansen(a)intel.com Cc: karahmed(a)amazon.de Cc: arjan(a)linux.intel.com Cc: torvalds(a)linux-foundation.org Cc: peterz(a)infradead.org Cc: bp(a)alien8.de Cc: pbonzini(a)redhat.com Cc: tim.c.chen(a)linux.intel.com Cc: gregkh(a)linux-foundation.org Link: https://lkml.kernel.org/r/1516896855-7642-2-git-send-email-dwmw@amazon.co.uk Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk> Signed-off-by: Youquan Song <youquan.song(a)linux.intel.com> [v4.4 backport] --- arch/x86/include/asm/cpufeature.h | 2 +- arch/x86/kernel/cpu/common.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 641f0f2..3d2556c 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include <asm/disabled-features.h> #endif -#define NCAPINTS 14 /* N 32-bit words worth of info */ +#define NCAPINTS 15 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8eabbaf..a97ed10 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -695,6 +695,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); c->x86_capability[9] = ebx; + c->x86_capability[14] = edx; } /* Extended state features: level 0x0000000d */ -- 1.8.3.1

7 years, 8 months

2
24
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror