April 2018 - Linux-stable-mirror

[PATCH resend 3/4] virt: vbox: Use __get_free_pages instead of kmalloc for DMA32 memory

by Hans de Goede

It is not possible to get DMA32 zone memory through kmalloc, causing the vboxguest driver to malfunction due to getting memory above 4G which the PCI device cannot handle. This commit changes the kmalloc calls where the 4G limit matters to using __get_free_pages() fixing vboxguest not working on x86_64 guests with more then 4G RAM. Cc: stable(a)vger.kernel.org Reported-by: Eloy Coto Pereiro <eloy.coto(a)gmail.com> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com> --- drivers/virt/vboxguest/vboxguest_linux.c | 19 ++++++++++++++++--- drivers/virt/vboxguest/vboxguest_utils.c | 5 +++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c index 82e280d38cc2..398d22693234 100644 --- a/drivers/virt/vboxguest/vboxguest_linux.c +++ b/drivers/virt/vboxguest/vboxguest_linux.c @@ -87,6 +87,7 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, struct vbg_session *session = filp->private_data; size_t returned_size, size; struct vbg_ioctl_hdr hdr; + bool is_vmmdev_req; int ret = 0; void *buf; @@ -106,8 +107,17 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, if (size > SZ_16M) return -E2BIG; - /* __GFP_DMA32 because IOCTL_VMMDEV_REQUEST passes this to the host */ - buf = kmalloc(size, GFP_KERNEL | __GFP_DMA32); + /* + * IOCTL_VMMDEV_REQUEST needs the buffer to be below 4G to avoid + * the need for a bounce-buffer and another copy later on. + */ + is_vmmdev_req = (req & ~IOCSIZE_MASK) == VBG_IOCTL_VMMDEV_REQUEST(0) || + req == VBG_IOCTL_VMMDEV_REQUEST_BIG; + + if (is_vmmdev_req) + buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT); + else + buf = kmalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -132,7 +142,10 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, ret = -EFAULT; out: - kfree(buf); + if (is_vmmdev_req) + vbg_req_free(buf, size); + else + kfree(buf); return ret; } diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index bad915463359..bf4474214b4d 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -65,8 +65,9 @@ VBG_LOG(vbg_debug, pr_debug); void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) { struct vmmdev_request_header *req; + int order = get_order(PAGE_ALIGN(len)); - req = kmalloc(len, GFP_KERNEL | __GFP_DMA32); + req = (void *)__get_free_pages(GFP_KERNEL | GFP_DMA32, order); if (!req) return NULL; @@ -87,7 +88,7 @@ void vbg_req_free(void *req, size_t len) if (!req) return; - kfree(req); + free_pages((unsigned long)req, get_order(PAGE_ALIGN(len))); } /* Note this function returns a VBox status code, not a negative errno!! */ -- 2.17.0

7 years, 2 months

1
0
0 0

[PATCH resend 2/4] virt: vbox: Add vbg_req_free() helper function

by Hans de Goede

This is a preparation patch for fixing issues on x86_64 virtual-machines with more then 4G of RAM, atm we pass __GFP_DMA32 to kmalloc, but kmalloc does not honor that, so we need to switch to get_pages, which means we will not be able to use kfree to free memory allocated with vbg_alloc_req. While at it also remove a comment on a vbg_alloc_req call which talks about Windows (inherited from the vbox upstream cross-platform code). Cc: stable(a)vger.kernel.org Signed-off-by: Hans de Goede <hdegoede(a)redhat.com> --- Changes in v2: -Put the vbg_req_free() prototype decl. in the private vboxguest_core.h instead of in linux/vbox_utils.h as it is private --- drivers/virt/vboxguest/vboxguest_core.c | 66 +++++++++++++----------- drivers/virt/vboxguest/vboxguest_core.h | 1 + drivers/virt/vboxguest/vboxguest_utils.c | 14 +++-- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c index 190dbf8cfcb5..7411a535fda2 100644 --- a/drivers/virt/vboxguest/vboxguest_core.c +++ b/drivers/virt/vboxguest/vboxguest_core.c @@ -114,7 +114,7 @@ static void vbg_guest_mappings_init(struct vbg_dev *gdev) } out: - kfree(req); + vbg_req_free(req, sizeof(*req)); kfree(pages); } @@ -144,7 +144,7 @@ static void vbg_guest_mappings_exit(struct vbg_dev *gdev) rc = vbg_req_perform(gdev, req); - kfree(req); + vbg_req_free(req, sizeof(*req)); if (rc < 0) { vbg_err("%s error: %d\n", __func__, rc); @@ -214,8 +214,8 @@ static int vbg_report_guest_info(struct vbg_dev *gdev) ret = vbg_status_code_to_errno(rc); out_free: - kfree(req2); - kfree(req1); + vbg_req_free(req2, sizeof(*req2)); + vbg_req_free(req1, sizeof(*req1)); return ret; } @@ -245,7 +245,7 @@ static int vbg_report_driver_status(struct vbg_dev *gdev, bool active) if (rc == VERR_NOT_IMPLEMENTED) /* Compatibility with older hosts. */ rc = VINF_SUCCESS; - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } @@ -431,7 +431,7 @@ static int vbg_heartbeat_host_config(struct vbg_dev *gdev, bool enabled) rc = vbg_req_perform(gdev, req); do_div(req->interval_ns, 1000000); /* ns -> ms */ gdev->heartbeat_interval_ms = req->interval_ns; - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } @@ -454,12 +454,6 @@ static int vbg_heartbeat_init(struct vbg_dev *gdev) if (ret < 0) return ret; - /* - * Preallocate the request to use it from the timer callback because: - * 1) on Windows vbg_req_alloc must be called at IRQL <= APC_LEVEL - * and the timer callback runs at DISPATCH_LEVEL; - * 2) avoid repeated allocations. - */ gdev->guest_heartbeat_req = vbg_req_alloc( sizeof(*gdev->guest_heartbeat_req), VMMDEVREQ_GUEST_HEARTBEAT); @@ -481,8 +475,8 @@ static void vbg_heartbeat_exit(struct vbg_dev *gdev) { del_timer_sync(&gdev->heartbeat_timer); vbg_heartbeat_host_config(gdev, false); - kfree(gdev->guest_heartbeat_req); - + vbg_req_free(gdev->guest_heartbeat_req, + sizeof(*gdev->guest_heartbeat_req)); } /** @@ -543,7 +537,7 @@ static int vbg_reset_host_event_filter(struct vbg_dev *gdev, if (rc < 0) vbg_err("%s error, rc: %d\n", __func__, rc); - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } @@ -617,7 +611,7 @@ static int vbg_set_session_event_filter(struct vbg_dev *gdev, out: mutex_unlock(&gdev->session_mutex); - kfree(req); + vbg_req_free(req, sizeof(*req)); return ret; } @@ -642,7 +636,7 @@ static int vbg_reset_host_capabilities(struct vbg_dev *gdev) if (rc < 0) vbg_err("%s error, rc: %d\n", __func__, rc); - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } @@ -712,7 +706,7 @@ static int vbg_set_session_capabilities(struct vbg_dev *gdev, out: mutex_unlock(&gdev->session_mutex); - kfree(req); + vbg_req_free(req, sizeof(*req)); return ret; } @@ -749,7 +743,7 @@ static int vbg_query_host_version(struct vbg_dev *gdev) } out: - kfree(req); + vbg_req_free(req, sizeof(*req)); return ret; } @@ -847,11 +841,16 @@ int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events) return 0; err_free_reqs: - kfree(gdev->mouse_status_req); - kfree(gdev->ack_events_req); - kfree(gdev->cancel_req); - kfree(gdev->mem_balloon.change_req); - kfree(gdev->mem_balloon.get_req); + vbg_req_free(gdev->mouse_status_req, + sizeof(*gdev->mouse_status_req)); + vbg_req_free(gdev->ack_events_req, + sizeof(*gdev->ack_events_req)); + vbg_req_free(gdev->cancel_req, + sizeof(*gdev->cancel_req)); + vbg_req_free(gdev->mem_balloon.change_req, + sizeof(*gdev->mem_balloon.change_req)); + vbg_req_free(gdev->mem_balloon.get_req, + sizeof(*gdev->mem_balloon.get_req)); return ret; } @@ -872,11 +871,16 @@ void vbg_core_exit(struct vbg_dev *gdev) vbg_reset_host_capabilities(gdev); vbg_core_set_mouse_status(gdev, 0); - kfree(gdev->mouse_status_req); - kfree(gdev->ack_events_req); - kfree(gdev->cancel_req); - kfree(gdev->mem_balloon.change_req); - kfree(gdev->mem_balloon.get_req); + vbg_req_free(gdev->mouse_status_req, + sizeof(*gdev->mouse_status_req)); + vbg_req_free(gdev->ack_events_req, + sizeof(*gdev->ack_events_req)); + vbg_req_free(gdev->cancel_req, + sizeof(*gdev->cancel_req)); + vbg_req_free(gdev->mem_balloon.change_req, + sizeof(*gdev->mem_balloon.change_req)); + vbg_req_free(gdev->mem_balloon.get_req, + sizeof(*gdev->mem_balloon.get_req)); } /** @@ -1415,7 +1419,7 @@ static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev, req->flags = dump->u.in.flags; dump->hdr.rc = vbg_req_perform(gdev, req); - kfree(req); + vbg_req_free(req, sizeof(*req)); return 0; } @@ -1513,7 +1517,7 @@ int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features) if (rc < 0) vbg_err("%s error, rc: %d\n", __func__, rc); - kfree(req); + vbg_req_free(req, sizeof(*req)); return vbg_status_code_to_errno(rc); } diff --git a/drivers/virt/vboxguest/vboxguest_core.h b/drivers/virt/vboxguest/vboxguest_core.h index 39ed85cf9244..7ad9ec45bfa9 100644 --- a/drivers/virt/vboxguest/vboxguest_core.h +++ b/drivers/virt/vboxguest/vboxguest_core.h @@ -173,6 +173,7 @@ void vbg_linux_mouse_event(struct vbg_dev *gdev); /* Private (non exported) functions form vboxguest_utils.c */ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); +void vbg_req_free(void *req, size_t len); int vbg_req_perform(struct vbg_dev *gdev, void *req); int vbg_hgcm_call32( struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index 0f0dab8023cf..bad915463359 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -82,6 +82,14 @@ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) return req; } +void vbg_req_free(void *req, size_t len) +{ + if (!req) + return; + + kfree(req); +} + /* Note this function returns a VBox status code, not a negative errno!! */ int vbg_req_perform(struct vbg_dev *gdev, void *req) { @@ -137,7 +145,7 @@ int vbg_hgcm_connect(struct vbg_dev *gdev, rc = hgcm_connect->header.result; } - kfree(hgcm_connect); + vbg_req_free(hgcm_connect, sizeof(*hgcm_connect)); *vbox_status = rc; return 0; @@ -166,7 +174,7 @@ int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status) if (rc >= 0) rc = hgcm_disconnect->header.result; - kfree(hgcm_disconnect); + vbg_req_free(hgcm_disconnect, sizeof(*hgcm_disconnect)); *vbox_status = rc; return 0; @@ -623,7 +631,7 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, } if (!leak_it) - kfree(call); + vbg_req_free(call, size); free_bounce_bufs: if (bounce_bufs) { -- 2.17.0

7 years, 2 months

1
0
0 0

[PATCH resend 1/4] virt: vbox: Move declarations of vboxguest private functions to private header

by Hans de Goede

Move the declarations of functions from vboxguest_utils.c which are only meant for vboxguest internal use from include/linux/vbox_utils.h to drivers/virt/vboxguest/vboxguest_core.h. Cc: stable(a)vger.kernel.org Signed-off-by: Hans de Goede <hdegoede(a)redhat.com> --- drivers/virt/vboxguest/vboxguest_core.h | 8 ++++++++ include/linux/vbox_utils.h | 23 ----------------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/drivers/virt/vboxguest/vboxguest_core.h b/drivers/virt/vboxguest/vboxguest_core.h index 6c784bf4fa6d..39ed85cf9244 100644 --- a/drivers/virt/vboxguest/vboxguest_core.h +++ b/drivers/virt/vboxguest/vboxguest_core.h @@ -171,4 +171,12 @@ irqreturn_t vbg_core_isr(int irq, void *dev_id); void vbg_linux_mouse_event(struct vbg_dev *gdev); +/* Private (non exported) functions form vboxguest_utils.c */ +void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); +int vbg_req_perform(struct vbg_dev *gdev, void *req); +int vbg_hgcm_call32( + struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, + struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count, + int *vbox_status); + #endif diff --git a/include/linux/vbox_utils.h b/include/linux/vbox_utils.h index c71def6b310f..a240ed2a0372 100644 --- a/include/linux/vbox_utils.h +++ b/include/linux/vbox_utils.h @@ -24,24 +24,6 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...); #define vbg_debug pr_debug #endif -/** - * Allocate memory for generic request and initialize the request header. - * - * Return: the allocated memory - * @len: Size of memory block required for the request. - * @req_type: The generic request type. - */ -void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); - -/** - * Perform a generic request. - * - * Return: VBox status code - * @gdev: The Guest extension device. - * @req: Pointer to the request structure. - */ -int vbg_req_perform(struct vbg_dev *gdev, void *req); - int vbg_hgcm_connect(struct vbg_dev *gdev, struct vmmdev_hgcm_service_location *loc, u32 *client_id, int *vbox_status); @@ -52,11 +34,6 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms, u32 parm_count, int *vbox_status); -int vbg_hgcm_call32( - struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, - struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count, - int *vbox_status); - /** * Convert a VirtualBox status code to a standard Linux kernel return value. * Return: 0 or negative errno value. -- 2.17.0

7 years, 2 months

1
0
0 0

[PATCH v2 00/10] SWIM driver fixes

by Finn Thain

This patch series has fixes for bugs in the SWIM floppy disk controller driver, including an oops and a soft lockup. One way to apply these patches to v4.14+ is by first cherry-picking these commits: b87eaec27eca3def6c8ed617e3b1bac08d7bc715 e5f0d2e2a153b18dcf31e1a633e210c37829d759 There are of course other ways to fix the patch rejects, but this way would be convenient for me because it would simplify my own backporting. Changes since v1: - Dropped the two IOP patches as they aren't simple fixes. This way, the entire series is suitable for stable trees. - Added Cc, Fixes, Acked-by and Reviewed-by tags. Finn Thain (10): m68k/mac: Revisit floppy disc controller base addresses m68k/mac: Fix SWIM memory resource end address m68k/mac: Don't remap SWIM MMIO region block/swim: Fix array bounds check block/swim: Remove extra put_disk() call from error path block/swim: Don't log an error message for an invalid ioctl block/swim: Rename macros to avoid inconsistent inverted logic block/swim: Check drive type block/swim: Fix IO error at end of medium block/swim: Select appropriate drive on device open arch/m68k/include/asm/macintosh.h | 10 +-- arch/m68k/mac/config.c | 126 ++++++++++++++++++++------------------ drivers/block/swim.c | 49 +++++++-------- drivers/block/swim3.c | 6 +- 4 files changed, 96 insertions(+), 95 deletions(-) -- 2.16.1

7 years, 2 months

3
14
0 0

[PATCH] Bluetooth: hci_qca: Avoid missing rampatch failure with userspace fw loader

by Amit Pundir

AOSP use userspace firmware loader to load firmwares, which will return -EAGAIN in case qca/rampatch_00440302.bin is not found. Since there is no rampatch for dragonboard820c QCA controller revision, just make it work as is. CC: Loic Poulain <loic.poulain(a)linaro.org> CC: Nicolas Dechesne <nicolas.dechesne(a)linaro.org> CC: Marcel Holtmann <marcel(a)holtmann.org> CC: Johan Hedberg <johan.hedberg(a)gmail.com> CC: Stable <stable(a)vger.kernel.org> Signed-off-by: Amit Pundir <amit.pundir(a)linaro.org> --- drivers/bluetooth/hci_qca.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 05ec530b8a3a..330e9b29e145 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -935,6 +935,12 @@ static int qca_setup(struct hci_uart *hu) } else if (ret == -ENOENT) { /* No patch/nvm-config found, run with original fw/config */ ret = 0; + } else if (ret == -EAGAIN) { + /* + * Userspace firmware loader will return -EAGAIN in case no + * patch/nvm-config is found, so run with original fw/config. + */ + ret = 0; } /* Setup bdaddr */ -- 2.7.4

7 years, 2 months

2
1
0 0

[PATCH v2] module: Fix display of wrong module .text address

by Thomas Richter

Fixes: ef0010a30935 ("vsprintf: don't use 'restricted_pointer()' when not restricting") for /sys/module/*/sections/.text file. Reading file /proc/modules shows the correct address: [root@s35lp76 ~]# cat /proc/modules | egrep '^qeth_l2' qeth_l2 94208 1 - Live 0x000003ff80401000 and reading file /sys/module/qeth_l2/sections/.text [root@s35lp76 ~]# cat /sys/module/qeth_l2/sections/.text 0x0000000018ea8363 displays a random address. This breaks the perf tool which uses this address on s390 to calculate start of .text section in memory. Fix this by printing the correct (unhashed) address. Thanks to Jessica Yu for helping on this. Suggested-by: Linus Torvalds <torvalds(a)linux-foundation.org> Signed-off-by: Thomas Richter <tmricht(a)linux.ibm.com> Cc: Jessica Yu <jeyu(a)kernel.org> Cc: stable(a)vger.kernel.org --- kernel/module.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/module.c b/kernel/module.c index a6e43a5806a1..40b42000bd80 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1472,7 +1472,8 @@ static ssize_t module_sect_show(struct module_attribute *mattr, { struct module_sect_attr *sattr = container_of(mattr, struct module_sect_attr, mattr); - return sprintf(buf, "0x%pK\n", (void *)sattr->address); + return sprintf(buf, "0x%px\n", kptr_restrict < 2 ? + (void *)sattr->address : 0); } static void free_sect_attrs(struct module_sect_attrs *sect_attrs) -- 2.14.3

7 years, 2 months

1
0
0 0

[PATCH for-next 00/13] IB/hfi1,core: Driver updates for 4.17

by Dennis Dalessandro

Hi Jason and Doug, Here are most of our updates for 4.17. I will follow this up with a small 16B series then I still have a few more patches that are waiting on some more thorough testing. Should be able to get them on the list tomrrow or Friday at the latest, wanted to get these out now. I don't think anything is really that scary in here. These are mostly driver fixes. Patches 4,7,8,11, and 14 are marked stable. They didn't get sent for the -rc because they fix really old issues. Patch 5 is a core fix. I should have sent it a bit sooner, sorry about that but it's pretty trivial so I decided to include it as well rather than wait for 4.18. --- Alex Estrin (2): IB/hfi1: Complete check for locally terminated smp IB/{hfi1,qib}: Add handling of kernel restart Ashutosh Dixit (1): IB/core: Fix rkey invalidation from user space into the kernel Michael J. Ruhl (5): IB/hfi1: Return actual error value from program_rcvarray() IB/hfi1: Use after free race condition in send context error path IB/hfi1 Use correct type for num_user_context IB/hfi1: Return correct value for device state IB/hfi1: Reorder incorrect send context disable Mike Marciniszyn (3): IB/hfi1: Fix handling of FECN marked multicast packet IB/hfi1: Fix fault injection init/exit issues IB/hfi1: Fix loss of BECN with AHG Sebastian Sanchez (2): IB/hfi1: Prevent LNI hang when LCB can't obtain lanes IB/{hfi1,rdmavt,qib}: Fit kernel completions into single aligned cache-line drivers/infiniband/core/uverbs_cmd.c | 4 + drivers/infiniband/hw/hfi1/chip.c | 59 ++++++++--- drivers/infiniband/hw/hfi1/chip.h | 15 ++- drivers/infiniband/hw/hfi1/chip_registers.h | 7 + drivers/infiniband/hw/hfi1/debugfs.c | 8 + drivers/infiniband/hw/hfi1/driver.c | 19 +++- drivers/infiniband/hw/hfi1/file_ops.c | 2 drivers/infiniband/hw/hfi1/hfi.h | 9 +- drivers/infiniband/hw/hfi1/init.c | 9 +- drivers/infiniband/hw/hfi1/mad.c | 36 ++++--- drivers/infiniband/hw/hfi1/pio.c | 44 ++++++-- drivers/infiniband/hw/hfi1/rc.c | 2 drivers/infiniband/hw/hfi1/ruc.c | 54 ++++++++-- drivers/infiniband/hw/hfi1/uc.c | 2 drivers/infiniband/hw/hfi1/ud.c | 10 +- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 1 drivers/infiniband/hw/qib/qib.h | 1 drivers/infiniband/hw/qib/qib_init.c | 5 + drivers/infiniband/hw/qib/qib_rc.c | 2 drivers/infiniband/hw/qib/qib_ruc.c | 4 - drivers/infiniband/hw/qib/qib_uc.c | 2 drivers/infiniband/hw/qib/qib_ud.c | 4 - drivers/infiniband/sw/rdmavt/cq.c | 146 ++++++++++++++++++--------- drivers/infiniband/sw/rdmavt/qp.c | 4 - drivers/infiniband/sw/rdmavt/trace_cq.h | 6 + include/rdma/ib_verbs.h | 5 + include/rdma/rdmavt_cq.h | 35 +++++- include/rdma/rdmavt_qp.h | 2 28 files changed, 344 insertions(+), 153 deletions(-) -- -Denny

7 years, 2 months

4
14
0 0

+ mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap.patch added to -mm tree

by akpm＠linux-foundation.org

The patch titled Subject: mm, oom: fix concurrent munlock and oom reaper unmap has been added to the -mm tree. Its filename is mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-oom-fix-concurrent-munlock-and-… and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-oom-fix-concurrent-munlock-and-… Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: David Rientjes <rientjes(a)google.com> Subject: mm, oom: fix concurrent munlock and oom reaper unmap Since exit_mmap() is done without the protection of mm->mmap_sem, it is possible for the oom reaper to concurrently operate on an mm until MMF_OOM_SKIP is set. This allows munlock_vma_pages_all() to concurrently run while the oom reaper is operating on a vma. Since munlock_vma_pages_range() depends on clearing VM_LOCKED from vm_flags before actually doing the munlock to determine if any other vmas are locking the same memory, the check for VM_LOCKED in the oom reaper is racy. This is especially noticeable on architectures such as powerpc where clearing a huge pmd requires kick_all_cpus_sync(). If the pmd is zapped by the oom reaper during follow_page_mask() after the check for pmd_none() is bypassed, this ends up deferencing a NULL ptl. Fix this by reusing MMF_UNSTABLE to specify that an mm should not be reaped. This prevents the concurrent munlock_vma_pages_range() and unmap_page_range(). The oom reaper will simply not operate on an mm that has the bit set and leave the unmapping to exit_mmap(). Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1804171545460.53786@chino.kir.corp… Fixes: 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently") Signed-off-by: David Rientjes <rientjes(a)google.com> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Andrea Arcangeli <aarcange(a)redhat.com> Cc: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp> Cc: Roman Gushchin <guro(a)fb.com> Cc: <stable(a)vger.kernel.org> [4.14+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- mm/mmap.c | 38 ++++++++++++++++++++------------------ mm/oom_kill.c | 19 ++++++++----------- 2 files changed, 28 insertions(+), 29 deletions(-) diff -puN mm/mmap.c~mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap mm/mmap.c --- a/mm/mmap.c~mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap +++ a/mm/mmap.c @@ -3015,6 +3015,25 @@ void exit_mmap(struct mm_struct *mm) /* mm's last user has gone, and its about to be pulled down */ mmu_notifier_release(mm); + if (unlikely(mm_is_oom_victim(mm))) { + /* + * Wait for oom_reap_task() to stop working on this mm. Because + * MMF_UNSTABLE is already set before calling down_read(), + * oom_reap_task() will not run on this mm after up_write(). + * oom_reap_task() also depends on a stable VM_LOCKED flag to + * indicate it should not unmap during munlock_vma_pages_all(). + * + * mm_is_oom_victim() cannot be set from under us because + * victim->mm is already set to NULL under task_lock before + * calling mmput() and victim->signal->oom_mm is set by the oom + * killer only if victim->mm is non-NULL while holding + * task_lock(). + */ + set_bit(MMF_UNSTABLE, &mm->flags); + down_write(&mm->mmap_sem); + up_write(&mm->mmap_sem); + } + if (mm->locked_vm) { vma = mm->mmap; while (vma) { @@ -3036,26 +3055,9 @@ void exit_mmap(struct mm_struct *mm) /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); - - if (unlikely(mm_is_oom_victim(mm))) { - /* - * Wait for oom_reap_task() to stop working on this - * mm. Because MMF_OOM_SKIP is already set before - * calling down_read(), oom_reap_task() will not run - * on this "mm" post up_write(). - * - * mm_is_oom_victim() cannot be set from under us - * either because victim->mm is already set to NULL - * under task_lock before calling mmput and oom_mm is - * set not NULL by the OOM killer only if victim->mm - * is found not NULL while holding the task_lock. - */ - set_bit(MMF_OOM_SKIP, &mm->flags); - down_write(&mm->mmap_sem); - up_write(&mm->mmap_sem); - } free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, 0, -1); + set_bit(MMF_OOM_SKIP, &mm->flags); /* * Walk the list again, actually closing and freeing it, diff -puN mm/oom_kill.c~mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap mm/oom_kill.c --- a/mm/oom_kill.c~mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap +++ a/mm/oom_kill.c @@ -521,12 +521,17 @@ static bool __oom_reap_task_mm(struct ta } /* - * MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't - * work on the mm anymore. The check for MMF_OOM_SKIP must run + * Tell all users of get_user/copy_from_user etc... that the content + * is no longer stable. No barriers really needed because unmapping + * should imply barriers already and the reader would hit a page fault + * if it stumbled over reaped memory. + * + * MMF_UNSTABLE is also set by exit_mmap when the OOM reaper shouldn't + * work on the mm anymore. The check for MMF_OOM_UNSTABLE must run * under mmap_sem for reading because it serializes against the * down_write();up_write() cycle in exit_mmap(). */ - if (test_bit(MMF_OOM_SKIP, &mm->flags)) { + if (test_and_set_bit(MMF_UNSTABLE, &mm->flags)) { up_read(&mm->mmap_sem); trace_skip_task_reaping(tsk->pid); goto unlock_oom; @@ -534,14 +539,6 @@ static bool __oom_reap_task_mm(struct ta trace_start_task_reaping(tsk->pid); - /* - * Tell all users of get_user/copy_from_user etc... that the content - * is no longer stable. No barriers really needed because unmapping - * should imply barriers already and the reader would hit a page fault - * if it stumbled over a reaped memory. - */ - set_bit(MMF_UNSTABLE, &mm->flags); - for (vma = mm->mmap ; vma; vma = vma->vm_next) { if (!can_madv_dontneed_vma(vma)) continue; _ Patches currently in -mm which might be from rientjes(a)google.com are mm-oom-fix-concurrent-munlock-and-oom-reaper-unmap.patch

7 years, 2 months

1
0
0 0

[PATCH 3/3] sd_zbc: Avoid that resetting a zone fails sporadically

by Bart Van Assche

Since SCSI scanning occurs asynchronously, since sd_revalidate_disk() is called from sd_probe_async() and since sd_revalidate_disk() calls sd_zbc_read_zones() it can happen that sd_zbc_read_zones() is called concurrently with blkdev_report_zones() and/or blkdev_reset_zones(). That can cause these functions to fail with -EIO because sd_zbc_read_zones() e.g. sets q->nr_zones to zero before restoring it to the actual value, even if no drive characteristics have changed. Avoid that this can happen by making the following changes: - Protect the code that updates zone information with blk_queue_enter() and blk_queue_exit(). - Modify sd_zbc_setup_seq_zones_bitmap() and sd_zbc_setup() such that these functions do not modify struct scsi_disk before all zone information has been obtained. Note: since commit 055f6e18e08f ("block: Make q_usage_counter also track legacy requests"; kernel v4.15) the request queue freezing mechanism also affects legacy request queues. Fixes: 89d947561077 ("sd: Implement support for ZBC devices") Signed-off-by: Bart Van Assche <bart.vanassche(a)wdc.com> Cc: Jens Axboe <axboe(a)kernel.dk> Cc: Damien Le Moal <damien.lemoal(a)wdc.com> Cc: Christoph Hellwig <hch(a)lst.de> Cc: Hannes Reinecke <hare(a)suse.com> Cc: stable(a)vger.kernel.org # v4.10 --- drivers/scsi/sd_zbc.c | 140 +++++++++++++++++++++++++++++-------------------- include/linux/blkdev.h | 5 ++ 2 files changed, 87 insertions(+), 58 deletions(-) diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 2d0c06f7db3e..323e3dc4bc59 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -390,8 +390,10 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf) * * Check that all zones of the device are equal. The last zone can however * be smaller. The zone size must also be a power of two number of LBAs. + * + * Returns the zone size in bytes upon success or an error code upon failure. */ -static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) +static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp) { u64 zone_blocks = 0; sector_t block = 0; @@ -402,8 +404,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) int ret; u8 same; - sdkp->zone_blocks = 0; - /* Get a buffer */ buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); if (!buf) @@ -435,16 +435,17 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) /* Parse zone descriptors */ while (rec < buf + buf_len) { - zone_blocks = get_unaligned_be64(&rec[8]); - if (sdkp->zone_blocks == 0) { - sdkp->zone_blocks = zone_blocks; - } else if (zone_blocks != sdkp->zone_blocks && - (block + zone_blocks < sdkp->capacity - || zone_blocks > sdkp->zone_blocks)) { - zone_blocks = 0; + u64 this_zone_blocks = get_unaligned_be64(&rec[8]); + + if (zone_blocks == 0) { + zone_blocks = this_zone_blocks; + } else if (this_zone_blocks != zone_blocks && + (block + this_zone_blocks < sdkp->capacity + || this_zone_blocks > zone_blocks)) { + this_zone_blocks = 0; goto out; } - block += zone_blocks; + block += this_zone_blocks; rec += 64; } @@ -457,8 +458,6 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) } while (block < sdkp->capacity); - zone_blocks = sdkp->zone_blocks; - out: if (!zone_blocks) { if (sdkp->first_scan) @@ -478,8 +477,7 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) "Zone size too large\n"); ret = -ENODEV; } else { - sdkp->zone_blocks = zone_blocks; - sdkp->zone_shift = ilog2(zone_blocks); + ret = zone_blocks; } out_free: @@ -490,15 +488,14 @@ static int sd_zbc_check_zone_size(struct scsi_disk *sdkp) /** * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone). - * @sdkp: The disk of the bitmap + * @nr_zones: Number of zones to allocate space for. + * @numa_node: NUMA node to allocate the memory from. */ -static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) +static inline unsigned long * +sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node) { - struct request_queue *q = sdkp->disk->queue; - - return kzalloc_node(BITS_TO_LONGS(sdkp->nr_zones) - * sizeof(unsigned long), - GFP_KERNEL, q->node); + return kzalloc_node(BITS_TO_LONGS(nr_zones) * sizeof(unsigned long), + GFP_KERNEL, numa_node); } /** @@ -506,6 +503,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) * @sdkp: disk used * @buf: report reply buffer * @buflen: length of @buf + * @zone_shift: logarithm base 2 of the number of blocks in a zone * @seq_zones_bitmap: bitmap of sequential zones to set * * Parse reported zone descriptors in @buf to identify sequential zones and @@ -515,7 +513,7 @@ static inline unsigned long *sd_zbc_alloc_zone_bitmap(struct scsi_disk *sdkp) * Return the LBA after the last zone reported. */ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, - unsigned int buflen, + unsigned int buflen, u32 zone_shift, unsigned long *seq_zones_bitmap) { sector_t lba, next_lba = sdkp->capacity; @@ -534,7 +532,7 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, if (type != ZBC_ZONE_TYPE_CONV && cond != ZBC_ZONE_COND_READONLY && cond != ZBC_ZONE_COND_OFFLINE) - set_bit(lba >> sdkp->zone_shift, seq_zones_bitmap); + set_bit(lba >> zone_shift, seq_zones_bitmap); next_lba = lba + get_unaligned_be64(&rec[8]); rec += 64; } @@ -543,12 +541,16 @@ static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf, } /** - * sd_zbc_setup_seq_zones_bitmap - Initialize the disk seq zone bitmap. + * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap. * @sdkp: target disk + * @zone_shift: logarithm base 2 of the number of blocks in a zone + * @nr_zones: number of zones to set up a seq zone bitmap for * * Allocate a zone bitmap and initialize it by identifying sequential zones. */ -static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) +static unsigned long * +sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift, + u32 nr_zones) { struct request_queue *q = sdkp->disk->queue; unsigned long *seq_zones_bitmap; @@ -556,9 +558,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) unsigned char *buf; int ret = -ENOMEM; - seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(sdkp); + seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node); if (!seq_zones_bitmap) - return -ENOMEM; + return ERR_PTR(-ENOMEM); buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); if (!buf) @@ -569,7 +571,7 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) if (ret) goto out; lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE, - seq_zones_bitmap); + zone_shift, seq_zones_bitmap); } if (lba != sdkp->capacity) { @@ -581,12 +583,9 @@ static int sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp) kfree(buf); if (ret) { kfree(seq_zones_bitmap); - return ret; + return ERR_PTR(ret); } - - q->seq_zones_bitmap = seq_zones_bitmap; - - return 0; + return seq_zones_bitmap; } static void sd_zbc_cleanup(struct scsi_disk *sdkp) @@ -602,44 +601,64 @@ static void sd_zbc_cleanup(struct scsi_disk *sdkp) q->nr_zones = 0; } -static int sd_zbc_setup(struct scsi_disk *sdkp) +static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks) { struct request_queue *q = sdkp->disk->queue; + u32 zone_shift = ilog2(zone_blocks); + u32 nr_zones; int ret; - /* READ16/WRITE16 is mandatory for ZBC disks */ - sdkp->device->use_16_for_rw = 1; - sdkp->device->use_10_for_rw = 0; - /* chunk_sectors indicates the zone size */ - blk_queue_chunk_sectors(sdkp->disk->queue, - logical_to_sectors(sdkp->device, sdkp->zone_blocks)); - sdkp->nr_zones = - round_up(sdkp->capacity, sdkp->zone_blocks) >> sdkp->zone_shift; + blk_queue_chunk_sectors(q, + logical_to_sectors(sdkp->device, zone_blocks)); + nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift; /* * Initialize the device request queue information if the number * of zones changed. */ - if (sdkp->nr_zones != q->nr_zones) { - - sd_zbc_cleanup(sdkp); - - q->nr_zones = sdkp->nr_zones; - if (sdkp->nr_zones) { - q->seq_zones_wlock = sd_zbc_alloc_zone_bitmap(sdkp); - if (!q->seq_zones_wlock) { + if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) { + unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; + size_t zone_bitmap_size; + + if (nr_zones) { + seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones, + q->node); + if (!seq_zones_wlock) { ret = -ENOMEM; goto err; } - ret = sd_zbc_setup_seq_zones_bitmap(sdkp); - if (ret) { - sd_zbc_cleanup(sdkp); + seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp, + zone_shift, nr_zones); + if (IS_ERR(seq_zones_bitmap)) { + ret = PTR_ERR(seq_zones_bitmap); + kfree(seq_zones_wlock); goto err; } } - + zone_bitmap_size = BITS_TO_LONGS(nr_zones) * + sizeof(unsigned long); + blk_mq_freeze_queue(q); + if (q->nr_zones != nr_zones) { + /* READ16/WRITE16 is mandatory for ZBC disks */ + sdkp->device->use_16_for_rw = 1; + sdkp->device->use_10_for_rw = 0; + + sdkp->zone_blocks = zone_blocks; + sdkp->zone_shift = zone_shift; + sdkp->nr_zones = nr_zones; + q->nr_zones = nr_zones; + swap(q->seq_zones_wlock, seq_zones_wlock); + swap(q->seq_zones_bitmap, seq_zones_bitmap); + } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size) != 0) { + memcpy(q->seq_zones_bitmap, seq_zones_bitmap, + zone_bitmap_size); + } + blk_mq_unfreeze_queue(q); + kfree(seq_zones_wlock); + kfree(seq_zones_bitmap); } return 0; @@ -651,6 +670,7 @@ static int sd_zbc_setup(struct scsi_disk *sdkp) int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) { + int64_t zone_blocks; int ret; if (!sd_is_zoned(sdkp)) @@ -687,12 +707,16 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf) * Check zone size: only devices with a constant zone size (except * an eventual last runt zone) that is a power of 2 are supported. */ - ret = sd_zbc_check_zone_size(sdkp); - if (ret) + zone_blocks = sd_zbc_check_zone_size(sdkp); + ret = -EFBIG; + if (zone_blocks != (u32)zone_blocks) + goto err; + ret = zone_blocks; + if (ret < 0) goto err; /* The drive satisfies the kernel restrictions: set it up */ - ret = sd_zbc_setup(sdkp); + ret = sd_zbc_setup(sdkp, zone_blocks); if (ret) goto err; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9af3e0f430bc..21e21f273a21 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -605,6 +605,11 @@ struct request_queue { * initialized by the low level device driver (e.g. scsi/sd.c). * Stacking drivers (device mappers) may or may not initialize * these fields. + * + * Reads of this information must be protected with blk_queue_enter() / + * blk_queue_exit(). Modifying this information is only allowed while + * no requests are being processed. See also blk_mq_freeze_queue() and + * blk_mq_unfreeze_queue(). */ unsigned int nr_zones; unsigned long *seq_zones_bitmap; -- 2.16.3

7 years, 2 months

2
1
0 0

[PATCH 1/6] cifs: smbd: Check for iov length on sending the last iov

by Long Li

From: Long Li <longli(a)microsoft.com> When sending the last iov that breaks into smaller buffers to fit the transfer size, it's necessary to check if this is the last iov. If this is the latest iov, stop and proceed to send pages. Signed-off-by: Long Li <longli(a)microsoft.com> --- fs/cifs/smbdirect.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 90e673c..b5c6c0d 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -2197,6 +2197,8 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) goto done; } i++; + if (i == rqst->rq_nvec) + break; } start = i; buflen = 0; -- 2.7.4

7 years, 2 months

5
14
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror April 2018