- Linux-stable-mirror - lists.linaro.org

FAILED: patch "[PATCH] rbd: decouple header read-in from updating rbd_dev->header" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 510a7330c82a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100419-skinless-reformer-a59b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 510a7330c82a7754d5df0117a8589e8a539067c7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov <idryomov(a)gmail.com> Date: Tue, 19 Sep 2023 20:41:47 +0200 Subject: [PATCH] rbd: decouple header read-in from updating rbd_dev->header Make rbd_dev_header_info() populate a passed struct rbd_image_header instead of rbd_dev->header and introduce rbd_dev_update_header() for updating mutable fields in rbd_dev->header upon refresh. The initial read-in of both mutable and immutable fields in rbd_dev_image_probe() passes in rbd_dev->header so no update step is required there. rbd_init_layout() is now called directly from rbd_dev_image_probe() instead of individually in format 1 and format 2 implementations. Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5da001f1fe94..6ed5520ef303 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -632,7 +632,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -993,15 +994,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev) RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); } +static void rbd_image_header_cleanup(struct rbd_image_header *header) +{ + kfree(header->object_prefix); + ceph_put_snap_context(header->snapc); + kfree(header->snap_sizes); + kfree(header->snap_names); + + memset(header, 0, sizeof(*header)); +} + /* * Fill an rbd image header with information from the given format 1 * on-disk header. */ -static int rbd_header_from_disk(struct rbd_device *rbd_dev, - struct rbd_image_header_ondisk *ondisk) +static int rbd_header_from_disk(struct rbd_image_header *header, + struct rbd_image_header_ondisk *ondisk, + bool first_time) { - struct rbd_image_header *header = &rbd_dev->header; - bool first_time = header->object_prefix == NULL; struct ceph_snap_context *snapc; char *object_prefix = NULL; char *snap_names = NULL; @@ -1068,11 +1078,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, if (first_time) { header->object_prefix = object_prefix; header->obj_order = ondisk->options.order; - rbd_init_layout(rbd_dev); - } else { - ceph_put_snap_context(header->snapc); - kfree(header->snap_names); - kfree(header->snap_sizes); } /* The remaining fields always get updated (when we refresh) */ @@ -4857,7 +4862,9 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, * return, the rbd_dev->header field will contain up-to-date * information about the image. */ -static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { struct rbd_image_header_ondisk *ondisk = NULL; u32 snap_count = 0; @@ -4905,7 +4912,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) snap_count = le32_to_cpu(ondisk->snap_count); } while (snap_count != want_count); - ret = rbd_header_from_disk(rbd_dev, ondisk); + ret = rbd_header_from_disk(header, ondisk, first_time); out: kfree(ondisk); @@ -5468,17 +5475,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, - &rbd_dev->header.obj_order, - &rbd_dev->header.image_size); -} - -static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) +static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev, + char **pobject_prefix) { size_t size; void *reply_buf; + char *object_prefix; int ret; void *p; @@ -5496,16 +5498,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) goto out; p = reply_buf; - rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, - p + ret, NULL, GFP_NOIO); + object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL, + GFP_NOIO); + if (IS_ERR(object_prefix)) { + ret = PTR_ERR(object_prefix); + goto out; + } ret = 0; - if (IS_ERR(rbd_dev->header.object_prefix)) { - ret = PTR_ERR(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } else { - dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); - } + *pobject_prefix = object_prefix; + dout(" object_prefix = %s\n", object_prefix); out: kfree(reply_buf); @@ -5556,13 +5558,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_features(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, - rbd_is_ro(rbd_dev), - &rbd_dev->header.features); -} - /* * These are generic image flags, but since they are used only for * object map, store them in rbd_dev->object_map_flags. @@ -5837,14 +5832,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) return ret; } -static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev, + u64 *stripe_unit, u64 *stripe_count) { struct { __le64 stripe_unit; __le64 stripe_count; } __attribute__ ((packed)) striping_info_buf = { 0 }; size_t size = sizeof (striping_info_buf); - void *p; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, @@ -5856,27 +5851,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) if (ret < size) return -ERANGE; - p = &striping_info_buf; - rbd_dev->header.stripe_unit = ceph_decode_64(&p); - rbd_dev->header.stripe_count = ceph_decode_64(&p); + *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit); + *stripe_count = le64_to_cpu(striping_info_buf.stripe_count); + dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit, + *stripe_count); + return 0; } -static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev) +static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id) { - __le64 data_pool_id; + __le64 data_pool_buf; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, &rbd_dev->header_oloc, "get_data_pool", - NULL, 0, &data_pool_id, sizeof(data_pool_id)); + NULL, 0, &data_pool_buf, + sizeof(data_pool_buf)); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; - if (ret < sizeof(data_pool_id)) + if (ret < sizeof(data_pool_buf)) return -EBADMSG; - rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id); - WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL); + *data_pool_id = le64_to_cpu(data_pool_buf); + dout(" data_pool_id = %lld\n", *data_pool_id); + WARN_ON(*data_pool_id == CEPH_NOPOOL); + return 0; } @@ -6068,7 +6069,8 @@ static int rbd_spec_fill_names(struct rbd_device *rbd_dev) return ret; } -static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) +static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, + struct ceph_snap_context **psnapc) { size_t size; int ret; @@ -6129,9 +6131,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) for (i = 0; i < snap_count; i++) snapc->snaps[i] = ceph_decode_64(&p); - ceph_put_snap_context(rbd_dev->header.snapc); - rbd_dev->header.snapc = snapc; - + *psnapc = snapc; dout(" snap context seq = %llu, snap_count = %u\n", (unsigned long long)seq, (unsigned int)snap_count); out: @@ -6180,38 +6180,42 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, return snap_name; } -static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { - bool first_time = rbd_dev->header.object_prefix == NULL; int ret; - ret = rbd_dev_v2_image_size(rbd_dev); + ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, + first_time ? &header->obj_order : NULL, + &header->image_size); if (ret) return ret; if (first_time) { - ret = rbd_dev_v2_header_onetime(rbd_dev); + ret = rbd_dev_v2_header_onetime(rbd_dev, header); if (ret) return ret; } - ret = rbd_dev_v2_snap_context(rbd_dev); - if (ret && first_time) { - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } + ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc); + if (ret) + return ret; - return ret; + return 0; } -static int rbd_dev_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(!header->object_prefix && !header->snapc); if (rbd_dev->image_format == 1) - return rbd_dev_v1_header_info(rbd_dev); + return rbd_dev_v1_header_info(rbd_dev, header, first_time); - return rbd_dev_v2_header_info(rbd_dev); + return rbd_dev_v2_header_info(rbd_dev, header, first_time); } /* @@ -6699,60 +6703,49 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) */ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) { - struct rbd_image_header *header; - rbd_dev_parent_put(rbd_dev); rbd_object_map_free(rbd_dev); rbd_dev_mapping_clear(rbd_dev); /* Free dynamic fields from the header, then zero it out */ - header = &rbd_dev->header; - ceph_put_snap_context(header->snapc); - kfree(header->snap_sizes); - kfree(header->snap_names); - kfree(header->object_prefix); - memset(header, 0, sizeof (*header)); + rbd_image_header_cleanup(&rbd_dev->header); } -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header) { int ret; - ret = rbd_dev_v2_object_prefix(rbd_dev); + ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix); if (ret) - goto out_err; + return ret; /* * Get the and check features for the image. Currently the * features are assumed to never change. */ - ret = rbd_dev_v2_features(rbd_dev); + ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, + rbd_is_ro(rbd_dev), &header->features); if (ret) - goto out_err; + return ret; /* If the image supports fancy striping, get its parameters */ - if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { - ret = rbd_dev_v2_striping_info(rbd_dev); - if (ret < 0) - goto out_err; - } - - if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) { - ret = rbd_dev_v2_data_pool(rbd_dev); + if (header->features & RBD_FEATURE_STRIPINGV2) { + ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit, + &header->stripe_count); if (ret) - goto out_err; + return ret; } - rbd_init_layout(rbd_dev); - return 0; + if (header->features & RBD_FEATURE_DATA_POOL) { + ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id); + if (ret) + return ret; + } -out_err: - rbd_dev->header.features = 0; - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - return ret; + return 0; } /* @@ -6947,13 +6940,15 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) if (!depth) down_write(&rbd_dev->header_rwsem); - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true); if (ret) { if (ret == -ENOENT && !need_watch) rbd_print_dne(rbd_dev, false); goto err_out_probe; } + rbd_init_layout(rbd_dev); + /* * If this image is the one being mapped, we have pool name and * id, image name and id, and snap name - need to fill snap id. @@ -7008,15 +7003,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) return ret; } +static void rbd_dev_update_header(struct rbd_device *rbd_dev, + struct rbd_image_header *header) +{ + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ + + rbd_dev->header.image_size = header->image_size; + + ceph_put_snap_context(rbd_dev->header.snapc); + rbd_dev->header.snapc = header->snapc; + header->snapc = NULL; + + if (rbd_dev->image_format == 1) { + kfree(rbd_dev->header.snap_names); + rbd_dev->header.snap_names = header->snap_names; + header->snap_names = NULL; + + kfree(rbd_dev->header.snap_sizes); + rbd_dev->header.snap_sizes = header->snap_sizes; + header->snap_sizes = NULL; + } +} + static int rbd_dev_refresh(struct rbd_device *rbd_dev) { + struct rbd_image_header header = { 0 }; u64 mapping_size; int ret; down_write(&rbd_dev->header_rwsem); mapping_size = rbd_dev->mapping.size; - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &header, false); if (ret) goto out; @@ -7030,6 +7049,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) goto out; } + rbd_dev_update_header(rbd_dev, &header); + rbd_assert(!rbd_is_snap(rbd_dev)); rbd_dev->mapping.size = rbd_dev->header.image_size; @@ -7038,6 +7059,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) if (!ret && mapping_size != rbd_dev->mapping.size) rbd_dev_update_size(rbd_dev); + rbd_image_header_cleanup(&header); return ret; }

2 years

1
0
0 0

FAILED: patch "[PATCH] rbd: decouple header read-in from updating rbd_dev->header" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 510a7330c82a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100420-registry-crux-4567@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 510a7330c82a7754d5df0117a8589e8a539067c7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov <idryomov(a)gmail.com> Date: Tue, 19 Sep 2023 20:41:47 +0200 Subject: [PATCH] rbd: decouple header read-in from updating rbd_dev->header Make rbd_dev_header_info() populate a passed struct rbd_image_header instead of rbd_dev->header and introduce rbd_dev_update_header() for updating mutable fields in rbd_dev->header upon refresh. The initial read-in of both mutable and immutable fields in rbd_dev_image_probe() passes in rbd_dev->header so no update step is required there. rbd_init_layout() is now called directly from rbd_dev_image_probe() instead of individually in format 1 and format 2 implementations. Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5da001f1fe94..6ed5520ef303 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -632,7 +632,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -993,15 +994,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev) RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); } +static void rbd_image_header_cleanup(struct rbd_image_header *header) +{ + kfree(header->object_prefix); + ceph_put_snap_context(header->snapc); + kfree(header->snap_sizes); + kfree(header->snap_names); + + memset(header, 0, sizeof(*header)); +} + /* * Fill an rbd image header with information from the given format 1 * on-disk header. */ -static int rbd_header_from_disk(struct rbd_device *rbd_dev, - struct rbd_image_header_ondisk *ondisk) +static int rbd_header_from_disk(struct rbd_image_header *header, + struct rbd_image_header_ondisk *ondisk, + bool first_time) { - struct rbd_image_header *header = &rbd_dev->header; - bool first_time = header->object_prefix == NULL; struct ceph_snap_context *snapc; char *object_prefix = NULL; char *snap_names = NULL; @@ -1068,11 +1078,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, if (first_time) { header->object_prefix = object_prefix; header->obj_order = ondisk->options.order; - rbd_init_layout(rbd_dev); - } else { - ceph_put_snap_context(header->snapc); - kfree(header->snap_names); - kfree(header->snap_sizes); } /* The remaining fields always get updated (when we refresh) */ @@ -4857,7 +4862,9 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, * return, the rbd_dev->header field will contain up-to-date * information about the image. */ -static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { struct rbd_image_header_ondisk *ondisk = NULL; u32 snap_count = 0; @@ -4905,7 +4912,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) snap_count = le32_to_cpu(ondisk->snap_count); } while (snap_count != want_count); - ret = rbd_header_from_disk(rbd_dev, ondisk); + ret = rbd_header_from_disk(header, ondisk, first_time); out: kfree(ondisk); @@ -5468,17 +5475,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, - &rbd_dev->header.obj_order, - &rbd_dev->header.image_size); -} - -static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) +static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev, + char **pobject_prefix) { size_t size; void *reply_buf; + char *object_prefix; int ret; void *p; @@ -5496,16 +5498,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) goto out; p = reply_buf; - rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, - p + ret, NULL, GFP_NOIO); + object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL, + GFP_NOIO); + if (IS_ERR(object_prefix)) { + ret = PTR_ERR(object_prefix); + goto out; + } ret = 0; - if (IS_ERR(rbd_dev->header.object_prefix)) { - ret = PTR_ERR(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } else { - dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); - } + *pobject_prefix = object_prefix; + dout(" object_prefix = %s\n", object_prefix); out: kfree(reply_buf); @@ -5556,13 +5558,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_features(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, - rbd_is_ro(rbd_dev), - &rbd_dev->header.features); -} - /* * These are generic image flags, but since they are used only for * object map, store them in rbd_dev->object_map_flags. @@ -5837,14 +5832,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) return ret; } -static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev, + u64 *stripe_unit, u64 *stripe_count) { struct { __le64 stripe_unit; __le64 stripe_count; } __attribute__ ((packed)) striping_info_buf = { 0 }; size_t size = sizeof (striping_info_buf); - void *p; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, @@ -5856,27 +5851,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) if (ret < size) return -ERANGE; - p = &striping_info_buf; - rbd_dev->header.stripe_unit = ceph_decode_64(&p); - rbd_dev->header.stripe_count = ceph_decode_64(&p); + *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit); + *stripe_count = le64_to_cpu(striping_info_buf.stripe_count); + dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit, + *stripe_count); + return 0; } -static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev) +static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id) { - __le64 data_pool_id; + __le64 data_pool_buf; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, &rbd_dev->header_oloc, "get_data_pool", - NULL, 0, &data_pool_id, sizeof(data_pool_id)); + NULL, 0, &data_pool_buf, + sizeof(data_pool_buf)); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; - if (ret < sizeof(data_pool_id)) + if (ret < sizeof(data_pool_buf)) return -EBADMSG; - rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id); - WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL); + *data_pool_id = le64_to_cpu(data_pool_buf); + dout(" data_pool_id = %lld\n", *data_pool_id); + WARN_ON(*data_pool_id == CEPH_NOPOOL); + return 0; } @@ -6068,7 +6069,8 @@ static int rbd_spec_fill_names(struct rbd_device *rbd_dev) return ret; } -static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) +static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, + struct ceph_snap_context **psnapc) { size_t size; int ret; @@ -6129,9 +6131,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) for (i = 0; i < snap_count; i++) snapc->snaps[i] = ceph_decode_64(&p); - ceph_put_snap_context(rbd_dev->header.snapc); - rbd_dev->header.snapc = snapc; - + *psnapc = snapc; dout(" snap context seq = %llu, snap_count = %u\n", (unsigned long long)seq, (unsigned int)snap_count); out: @@ -6180,38 +6180,42 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, return snap_name; } -static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { - bool first_time = rbd_dev->header.object_prefix == NULL; int ret; - ret = rbd_dev_v2_image_size(rbd_dev); + ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, + first_time ? &header->obj_order : NULL, + &header->image_size); if (ret) return ret; if (first_time) { - ret = rbd_dev_v2_header_onetime(rbd_dev); + ret = rbd_dev_v2_header_onetime(rbd_dev, header); if (ret) return ret; } - ret = rbd_dev_v2_snap_context(rbd_dev); - if (ret && first_time) { - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } + ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc); + if (ret) + return ret; - return ret; + return 0; } -static int rbd_dev_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(!header->object_prefix && !header->snapc); if (rbd_dev->image_format == 1) - return rbd_dev_v1_header_info(rbd_dev); + return rbd_dev_v1_header_info(rbd_dev, header, first_time); - return rbd_dev_v2_header_info(rbd_dev); + return rbd_dev_v2_header_info(rbd_dev, header, first_time); } /* @@ -6699,60 +6703,49 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) */ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) { - struct rbd_image_header *header; - rbd_dev_parent_put(rbd_dev); rbd_object_map_free(rbd_dev); rbd_dev_mapping_clear(rbd_dev); /* Free dynamic fields from the header, then zero it out */ - header = &rbd_dev->header; - ceph_put_snap_context(header->snapc); - kfree(header->snap_sizes); - kfree(header->snap_names); - kfree(header->object_prefix); - memset(header, 0, sizeof (*header)); + rbd_image_header_cleanup(&rbd_dev->header); } -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header) { int ret; - ret = rbd_dev_v2_object_prefix(rbd_dev); + ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix); if (ret) - goto out_err; + return ret; /* * Get the and check features for the image. Currently the * features are assumed to never change. */ - ret = rbd_dev_v2_features(rbd_dev); + ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, + rbd_is_ro(rbd_dev), &header->features); if (ret) - goto out_err; + return ret; /* If the image supports fancy striping, get its parameters */ - if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { - ret = rbd_dev_v2_striping_info(rbd_dev); - if (ret < 0) - goto out_err; - } - - if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) { - ret = rbd_dev_v2_data_pool(rbd_dev); + if (header->features & RBD_FEATURE_STRIPINGV2) { + ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit, + &header->stripe_count); if (ret) - goto out_err; + return ret; } - rbd_init_layout(rbd_dev); - return 0; + if (header->features & RBD_FEATURE_DATA_POOL) { + ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id); + if (ret) + return ret; + } -out_err: - rbd_dev->header.features = 0; - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - return ret; + return 0; } /* @@ -6947,13 +6940,15 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) if (!depth) down_write(&rbd_dev->header_rwsem); - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true); if (ret) { if (ret == -ENOENT && !need_watch) rbd_print_dne(rbd_dev, false); goto err_out_probe; } + rbd_init_layout(rbd_dev); + /* * If this image is the one being mapped, we have pool name and * id, image name and id, and snap name - need to fill snap id. @@ -7008,15 +7003,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) return ret; } +static void rbd_dev_update_header(struct rbd_device *rbd_dev, + struct rbd_image_header *header) +{ + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ + + rbd_dev->header.image_size = header->image_size; + + ceph_put_snap_context(rbd_dev->header.snapc); + rbd_dev->header.snapc = header->snapc; + header->snapc = NULL; + + if (rbd_dev->image_format == 1) { + kfree(rbd_dev->header.snap_names); + rbd_dev->header.snap_names = header->snap_names; + header->snap_names = NULL; + + kfree(rbd_dev->header.snap_sizes); + rbd_dev->header.snap_sizes = header->snap_sizes; + header->snap_sizes = NULL; + } +} + static int rbd_dev_refresh(struct rbd_device *rbd_dev) { + struct rbd_image_header header = { 0 }; u64 mapping_size; int ret; down_write(&rbd_dev->header_rwsem); mapping_size = rbd_dev->mapping.size; - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &header, false); if (ret) goto out; @@ -7030,6 +7049,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) goto out; } + rbd_dev_update_header(rbd_dev, &header); + rbd_assert(!rbd_is_snap(rbd_dev)); rbd_dev->mapping.size = rbd_dev->header.image_size; @@ -7038,6 +7059,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) if (!ret && mapping_size != rbd_dev->mapping.size) rbd_dev_update_size(rbd_dev); + rbd_image_header_cleanup(&header); return ret; }

2 years

1
0
0 0

FAILED: patch "[PATCH] rbd: decouple header read-in from updating rbd_dev->header" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 510a7330c82a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100418-shriek-sandpaper-186f@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 510a7330c82a7754d5df0117a8589e8a539067c7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov <idryomov(a)gmail.com> Date: Tue, 19 Sep 2023 20:41:47 +0200 Subject: [PATCH] rbd: decouple header read-in from updating rbd_dev->header Make rbd_dev_header_info() populate a passed struct rbd_image_header instead of rbd_dev->header and introduce rbd_dev_update_header() for updating mutable fields in rbd_dev->header upon refresh. The initial read-in of both mutable and immutable fields in rbd_dev_image_probe() passes in rbd_dev->header so no update step is required there. rbd_init_layout() is now called directly from rbd_dev_image_probe() instead of individually in format 1 and format 2 implementations. Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 5da001f1fe94..6ed5520ef303 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -632,7 +632,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -993,15 +994,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev) RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL); } +static void rbd_image_header_cleanup(struct rbd_image_header *header) +{ + kfree(header->object_prefix); + ceph_put_snap_context(header->snapc); + kfree(header->snap_sizes); + kfree(header->snap_names); + + memset(header, 0, sizeof(*header)); +} + /* * Fill an rbd image header with information from the given format 1 * on-disk header. */ -static int rbd_header_from_disk(struct rbd_device *rbd_dev, - struct rbd_image_header_ondisk *ondisk) +static int rbd_header_from_disk(struct rbd_image_header *header, + struct rbd_image_header_ondisk *ondisk, + bool first_time) { - struct rbd_image_header *header = &rbd_dev->header; - bool first_time = header->object_prefix == NULL; struct ceph_snap_context *snapc; char *object_prefix = NULL; char *snap_names = NULL; @@ -1068,11 +1078,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, if (first_time) { header->object_prefix = object_prefix; header->obj_order = ondisk->options.order; - rbd_init_layout(rbd_dev); - } else { - ceph_put_snap_context(header->snapc); - kfree(header->snap_names); - kfree(header->snap_sizes); } /* The remaining fields always get updated (when we refresh) */ @@ -4857,7 +4862,9 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, * return, the rbd_dev->header field will contain up-to-date * information about the image. */ -static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { struct rbd_image_header_ondisk *ondisk = NULL; u32 snap_count = 0; @@ -4905,7 +4912,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) snap_count = le32_to_cpu(ondisk->snap_count); } while (snap_count != want_count); - ret = rbd_header_from_disk(rbd_dev, ondisk); + ret = rbd_header_from_disk(header, ondisk, first_time); out: kfree(ondisk); @@ -5468,17 +5475,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, - &rbd_dev->header.obj_order, - &rbd_dev->header.image_size); -} - -static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) +static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev, + char **pobject_prefix) { size_t size; void *reply_buf; + char *object_prefix; int ret; void *p; @@ -5496,16 +5498,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) goto out; p = reply_buf; - rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, - p + ret, NULL, GFP_NOIO); + object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL, + GFP_NOIO); + if (IS_ERR(object_prefix)) { + ret = PTR_ERR(object_prefix); + goto out; + } ret = 0; - if (IS_ERR(rbd_dev->header.object_prefix)) { - ret = PTR_ERR(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } else { - dout(" object_prefix = %s\n", rbd_dev->header.object_prefix); - } + *pobject_prefix = object_prefix; + dout(" object_prefix = %s\n", object_prefix); out: kfree(reply_buf); @@ -5556,13 +5558,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, return 0; } -static int rbd_dev_v2_features(struct rbd_device *rbd_dev) -{ - return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, - rbd_is_ro(rbd_dev), - &rbd_dev->header.features); -} - /* * These are generic image flags, but since they are used only for * object map, store them in rbd_dev->object_map_flags. @@ -5837,14 +5832,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) return ret; } -static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev, + u64 *stripe_unit, u64 *stripe_count) { struct { __le64 stripe_unit; __le64 stripe_count; } __attribute__ ((packed)) striping_info_buf = { 0 }; size_t size = sizeof (striping_info_buf); - void *p; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, @@ -5856,27 +5851,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev) if (ret < size) return -ERANGE; - p = &striping_info_buf; - rbd_dev->header.stripe_unit = ceph_decode_64(&p); - rbd_dev->header.stripe_count = ceph_decode_64(&p); + *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit); + *stripe_count = le64_to_cpu(striping_info_buf.stripe_count); + dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit, + *stripe_count); + return 0; } -static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev) +static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id) { - __le64 data_pool_id; + __le64 data_pool_buf; int ret; ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid, &rbd_dev->header_oloc, "get_data_pool", - NULL, 0, &data_pool_id, sizeof(data_pool_id)); + NULL, 0, &data_pool_buf, + sizeof(data_pool_buf)); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; - if (ret < sizeof(data_pool_id)) + if (ret < sizeof(data_pool_buf)) return -EBADMSG; - rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id); - WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL); + *data_pool_id = le64_to_cpu(data_pool_buf); + dout(" data_pool_id = %lld\n", *data_pool_id); + WARN_ON(*data_pool_id == CEPH_NOPOOL); + return 0; } @@ -6068,7 +6069,8 @@ static int rbd_spec_fill_names(struct rbd_device *rbd_dev) return ret; } -static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) +static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, + struct ceph_snap_context **psnapc) { size_t size; int ret; @@ -6129,9 +6131,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) for (i = 0; i < snap_count; i++) snapc->snaps[i] = ceph_decode_64(&p); - ceph_put_snap_context(rbd_dev->header.snapc); - rbd_dev->header.snapc = snapc; - + *psnapc = snapc; dout(" snap context seq = %llu, snap_count = %u\n", (unsigned long long)seq, (unsigned int)snap_count); out: @@ -6180,38 +6180,42 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, return snap_name; } -static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { - bool first_time = rbd_dev->header.object_prefix == NULL; int ret; - ret = rbd_dev_v2_image_size(rbd_dev); + ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP, + first_time ? &header->obj_order : NULL, + &header->image_size); if (ret) return ret; if (first_time) { - ret = rbd_dev_v2_header_onetime(rbd_dev); + ret = rbd_dev_v2_header_onetime(rbd_dev, header); if (ret) return ret; } - ret = rbd_dev_v2_snap_context(rbd_dev); - if (ret && first_time) { - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - } + ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc); + if (ret) + return ret; - return ret; + return 0; } -static int rbd_dev_header_info(struct rbd_device *rbd_dev) +static int rbd_dev_header_info(struct rbd_device *rbd_dev, + struct rbd_image_header *header, + bool first_time) { rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(!header->object_prefix && !header->snapc); if (rbd_dev->image_format == 1) - return rbd_dev_v1_header_info(rbd_dev); + return rbd_dev_v1_header_info(rbd_dev, header, first_time); - return rbd_dev_v2_header_info(rbd_dev); + return rbd_dev_v2_header_info(rbd_dev, header, first_time); } /* @@ -6699,60 +6703,49 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) */ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) { - struct rbd_image_header *header; - rbd_dev_parent_put(rbd_dev); rbd_object_map_free(rbd_dev); rbd_dev_mapping_clear(rbd_dev); /* Free dynamic fields from the header, then zero it out */ - header = &rbd_dev->header; - ceph_put_snap_context(header->snapc); - kfree(header->snap_sizes); - kfree(header->snap_names); - kfree(header->object_prefix); - memset(header, 0, sizeof (*header)); + rbd_image_header_cleanup(&rbd_dev->header); } -static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) +static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev, + struct rbd_image_header *header) { int ret; - ret = rbd_dev_v2_object_prefix(rbd_dev); + ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix); if (ret) - goto out_err; + return ret; /* * Get the and check features for the image. Currently the * features are assumed to never change. */ - ret = rbd_dev_v2_features(rbd_dev); + ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP, + rbd_is_ro(rbd_dev), &header->features); if (ret) - goto out_err; + return ret; /* If the image supports fancy striping, get its parameters */ - if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { - ret = rbd_dev_v2_striping_info(rbd_dev); - if (ret < 0) - goto out_err; - } - - if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) { - ret = rbd_dev_v2_data_pool(rbd_dev); + if (header->features & RBD_FEATURE_STRIPINGV2) { + ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit, + &header->stripe_count); if (ret) - goto out_err; + return ret; } - rbd_init_layout(rbd_dev); - return 0; + if (header->features & RBD_FEATURE_DATA_POOL) { + ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id); + if (ret) + return ret; + } -out_err: - rbd_dev->header.features = 0; - kfree(rbd_dev->header.object_prefix); - rbd_dev->header.object_prefix = NULL; - return ret; + return 0; } /* @@ -6947,13 +6940,15 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) if (!depth) down_write(&rbd_dev->header_rwsem); - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true); if (ret) { if (ret == -ENOENT && !need_watch) rbd_print_dne(rbd_dev, false); goto err_out_probe; } + rbd_init_layout(rbd_dev); + /* * If this image is the one being mapped, we have pool name and * id, image name and id, and snap name - need to fill snap id. @@ -7008,15 +7003,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) return ret; } +static void rbd_dev_update_header(struct rbd_device *rbd_dev, + struct rbd_image_header *header) +{ + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + rbd_assert(rbd_dev->header.object_prefix); /* !first_time */ + + rbd_dev->header.image_size = header->image_size; + + ceph_put_snap_context(rbd_dev->header.snapc); + rbd_dev->header.snapc = header->snapc; + header->snapc = NULL; + + if (rbd_dev->image_format == 1) { + kfree(rbd_dev->header.snap_names); + rbd_dev->header.snap_names = header->snap_names; + header->snap_names = NULL; + + kfree(rbd_dev->header.snap_sizes); + rbd_dev->header.snap_sizes = header->snap_sizes; + header->snap_sizes = NULL; + } +} + static int rbd_dev_refresh(struct rbd_device *rbd_dev) { + struct rbd_image_header header = { 0 }; u64 mapping_size; int ret; down_write(&rbd_dev->header_rwsem); mapping_size = rbd_dev->mapping.size; - ret = rbd_dev_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev, &header, false); if (ret) goto out; @@ -7030,6 +7049,8 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) goto out; } + rbd_dev_update_header(rbd_dev, &header); + rbd_assert(!rbd_is_snap(rbd_dev)); rbd_dev->mapping.size = rbd_dev->header.image_size; @@ -7038,6 +7059,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) if (!ret && mapping_size != rbd_dev->mapping.size) rbd_dev_update_size(rbd_dev); + rbd_image_header_cleanup(&header); return ret; }

2 years

1
0
0 0

FAILED: patch "[PATCH] rbd: move rbd_dev_refresh() definition" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 0b035401c570 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100403-vastly-take-591f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 0b035401c57021fc6c300272cbb1c5a889d4fe45 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov <idryomov(a)gmail.com> Date: Sun, 17 Sep 2023 15:07:40 +0200 Subject: [PATCH] rbd: move rbd_dev_refresh() definition Move rbd_dev_refresh() definition further down to avoid having to move struct parent_image_info definition in the next commit. This spares some forward declarations too. Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3de11f077144..5da001f1fe94 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -633,8 +633,6 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); -static int rbd_dev_header_info(struct rbd_device *rbd_dev); -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -4931,39 +4929,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) } } -static int rbd_dev_refresh(struct rbd_device *rbd_dev) -{ - u64 mapping_size; - int ret; - - down_write(&rbd_dev->header_rwsem); - mapping_size = rbd_dev->mapping.size; - - ret = rbd_dev_header_info(rbd_dev); - if (ret) - goto out; - - /* - * If there is a parent, see if it has disappeared due to the - * mapped image getting flattened. - */ - if (rbd_dev->parent) { - ret = rbd_dev_v2_parent_info(rbd_dev); - if (ret) - goto out; - } - - rbd_assert(!rbd_is_snap(rbd_dev)); - rbd_dev->mapping.size = rbd_dev->header.image_size; - -out: - up_write(&rbd_dev->header_rwsem); - if (!ret && mapping_size != rbd_dev->mapping.size) - rbd_dev_update_size(rbd_dev); - - return ret; -} - static const struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, }; @@ -7043,6 +7008,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) return ret; } +static int rbd_dev_refresh(struct rbd_device *rbd_dev) +{ + u64 mapping_size; + int ret; + + down_write(&rbd_dev->header_rwsem); + mapping_size = rbd_dev->mapping.size; + + ret = rbd_dev_header_info(rbd_dev); + if (ret) + goto out; + + /* + * If there is a parent, see if it has disappeared due to the + * mapped image getting flattened. + */ + if (rbd_dev->parent) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret) + goto out; + } + + rbd_assert(!rbd_is_snap(rbd_dev)); + rbd_dev->mapping.size = rbd_dev->header.image_size; + +out: + up_write(&rbd_dev->header_rwsem); + if (!ret && mapping_size != rbd_dev->mapping.size) + rbd_dev_update_size(rbd_dev); + + return ret; +} + static ssize_t do_rbd_add(const char *buf, size_t count) { struct rbd_device *rbd_dev = NULL;

2 years

1
0
0 0

FAILED: patch "[PATCH] rbd: move rbd_dev_refresh() definition" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 0b035401c570 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100402-cobbler-happy-ee24@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 0b035401c57021fc6c300272cbb1c5a889d4fe45 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov <idryomov(a)gmail.com> Date: Sun, 17 Sep 2023 15:07:40 +0200 Subject: [PATCH] rbd: move rbd_dev_refresh() definition Move rbd_dev_refresh() definition further down to avoid having to move struct parent_image_info definition in the next commit. This spares some forward declarations too. Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3de11f077144..5da001f1fe94 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -633,8 +633,6 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); -static int rbd_dev_header_info(struct rbd_device *rbd_dev); -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -4931,39 +4929,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) } } -static int rbd_dev_refresh(struct rbd_device *rbd_dev) -{ - u64 mapping_size; - int ret; - - down_write(&rbd_dev->header_rwsem); - mapping_size = rbd_dev->mapping.size; - - ret = rbd_dev_header_info(rbd_dev); - if (ret) - goto out; - - /* - * If there is a parent, see if it has disappeared due to the - * mapped image getting flattened. - */ - if (rbd_dev->parent) { - ret = rbd_dev_v2_parent_info(rbd_dev); - if (ret) - goto out; - } - - rbd_assert(!rbd_is_snap(rbd_dev)); - rbd_dev->mapping.size = rbd_dev->header.image_size; - -out: - up_write(&rbd_dev->header_rwsem); - if (!ret && mapping_size != rbd_dev->mapping.size) - rbd_dev_update_size(rbd_dev); - - return ret; -} - static const struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, }; @@ -7043,6 +7008,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) return ret; } +static int rbd_dev_refresh(struct rbd_device *rbd_dev) +{ + u64 mapping_size; + int ret; + + down_write(&rbd_dev->header_rwsem); + mapping_size = rbd_dev->mapping.size; + + ret = rbd_dev_header_info(rbd_dev); + if (ret) + goto out; + + /* + * If there is a parent, see if it has disappeared due to the + * mapped image getting flattened. + */ + if (rbd_dev->parent) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret) + goto out; + } + + rbd_assert(!rbd_is_snap(rbd_dev)); + rbd_dev->mapping.size = rbd_dev->header.image_size; + +out: + up_write(&rbd_dev->header_rwsem); + if (!ret && mapping_size != rbd_dev->mapping.size) + rbd_dev_update_size(rbd_dev); + + return ret; +} + static ssize_t do_rbd_add(const char *buf, size_t count) { struct rbd_device *rbd_dev = NULL;

2 years

1
0
0 0

FAILED: patch "[PATCH] rbd: move rbd_dev_refresh() definition" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 0b035401c570 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100402-gaffe-unsafe-73ee@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 0b035401c57021fc6c300272cbb1c5a889d4fe45 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov <idryomov(a)gmail.com> Date: Sun, 17 Sep 2023 15:07:40 +0200 Subject: [PATCH] rbd: move rbd_dev_refresh() definition Move rbd_dev_refresh() definition further down to avoid having to move struct parent_image_info definition in the next commit. This spares some forward declarations too. Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3de11f077144..5da001f1fe94 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -633,8 +633,6 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); -static int rbd_dev_header_info(struct rbd_device *rbd_dev); -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -4931,39 +4929,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) } } -static int rbd_dev_refresh(struct rbd_device *rbd_dev) -{ - u64 mapping_size; - int ret; - - down_write(&rbd_dev->header_rwsem); - mapping_size = rbd_dev->mapping.size; - - ret = rbd_dev_header_info(rbd_dev); - if (ret) - goto out; - - /* - * If there is a parent, see if it has disappeared due to the - * mapped image getting flattened. - */ - if (rbd_dev->parent) { - ret = rbd_dev_v2_parent_info(rbd_dev); - if (ret) - goto out; - } - - rbd_assert(!rbd_is_snap(rbd_dev)); - rbd_dev->mapping.size = rbd_dev->header.image_size; - -out: - up_write(&rbd_dev->header_rwsem); - if (!ret && mapping_size != rbd_dev->mapping.size) - rbd_dev_update_size(rbd_dev); - - return ret; -} - static const struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, }; @@ -7043,6 +7008,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) return ret; } +static int rbd_dev_refresh(struct rbd_device *rbd_dev) +{ + u64 mapping_size; + int ret; + + down_write(&rbd_dev->header_rwsem); + mapping_size = rbd_dev->mapping.size; + + ret = rbd_dev_header_info(rbd_dev); + if (ret) + goto out; + + /* + * If there is a parent, see if it has disappeared due to the + * mapped image getting flattened. + */ + if (rbd_dev->parent) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret) + goto out; + } + + rbd_assert(!rbd_is_snap(rbd_dev)); + rbd_dev->mapping.size = rbd_dev->header.image_size; + +out: + up_write(&rbd_dev->header_rwsem); + if (!ret && mapping_size != rbd_dev->mapping.size) + rbd_dev_update_size(rbd_dev); + + return ret; +} + static ssize_t do_rbd_add(const char *buf, size_t count) { struct rbd_device *rbd_dev = NULL;

2 years

1
0
0 0

FAILED: patch "[PATCH] rbd: move rbd_dev_refresh() definition" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 0b035401c570 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100401-brunch-clutch-5341@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 0b035401c57021fc6c300272cbb1c5a889d4fe45 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov <idryomov(a)gmail.com> Date: Sun, 17 Sep 2023 15:07:40 +0200 Subject: [PATCH] rbd: move rbd_dev_refresh() definition Move rbd_dev_refresh() definition further down to avoid having to move struct parent_image_info definition in the next commit. This spares some forward declarations too. Signed-off-by: Ilya Dryomov <idryomov(a)gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3de11f077144..5da001f1fe94 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -633,8 +633,6 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); -static int rbd_dev_header_info(struct rbd_device *rbd_dev); -static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -4931,39 +4929,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) } } -static int rbd_dev_refresh(struct rbd_device *rbd_dev) -{ - u64 mapping_size; - int ret; - - down_write(&rbd_dev->header_rwsem); - mapping_size = rbd_dev->mapping.size; - - ret = rbd_dev_header_info(rbd_dev); - if (ret) - goto out; - - /* - * If there is a parent, see if it has disappeared due to the - * mapped image getting flattened. - */ - if (rbd_dev->parent) { - ret = rbd_dev_v2_parent_info(rbd_dev); - if (ret) - goto out; - } - - rbd_assert(!rbd_is_snap(rbd_dev)); - rbd_dev->mapping.size = rbd_dev->header.image_size; - -out: - up_write(&rbd_dev->header_rwsem); - if (!ret && mapping_size != rbd_dev->mapping.size) - rbd_dev_update_size(rbd_dev); - - return ret; -} - static const struct blk_mq_ops rbd_mq_ops = { .queue_rq = rbd_queue_rq, }; @@ -7043,6 +7008,39 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) return ret; } +static int rbd_dev_refresh(struct rbd_device *rbd_dev) +{ + u64 mapping_size; + int ret; + + down_write(&rbd_dev->header_rwsem); + mapping_size = rbd_dev->mapping.size; + + ret = rbd_dev_header_info(rbd_dev); + if (ret) + goto out; + + /* + * If there is a parent, see if it has disappeared due to the + * mapped image getting flattened. + */ + if (rbd_dev->parent) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret) + goto out; + } + + rbd_assert(!rbd_is_snap(rbd_dev)); + rbd_dev->mapping.size = rbd_dev->header.image_size; + +out: + up_write(&rbd_dev->header_rwsem); + if (!ret && mapping_size != rbd_dev->mapping.size) + rbd_dev_update_size(rbd_dev); + + return ret; +} + static ssize_t do_rbd_add(const char *buf, size_t count) { struct rbd_device *rbd_dev = NULL;

2 years

1
0
0 0

FAILED: patch "[PATCH] btrfs: file_remove_privs needs an exclusive lock in direct io" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 9af86694fd5d387992699ec99007ed374966ce9a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100456-previous-tarantula-2517@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9af86694fd5d387992699ec99007ed374966ce9a Mon Sep 17 00:00:00 2001 From: Bernd Schubert <bschubert(a)ddn.com> Date: Wed, 6 Sep 2023 17:59:03 +0200 Subject: [PATCH] btrfs: file_remove_privs needs an exclusive lock in direct io write This was noticed by Miklos that file_remove_privs might call into notify_change(), which requires to hold an exclusive lock. The problem exists in FUSE and btrfs. We can fix it without any additional helpers from VFS, in case the privileges would need to be dropped, change the lock type to be exclusive and redo the loop. Fixes: e9adabb9712e ("btrfs: use shared lock for direct writes within EOF") CC: Miklos Szeredi <miklos(a)szeredi.hu> CC: stable(a)vger.kernel.org # 5.15+ Reviewed-by: Christoph Hellwig <hch(a)lst.de> Signed-off-by: Bernd Schubert <bschubert(a)ddn.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6edad7b9a5d3..e8726a83b649 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1466,8 +1466,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; - /* If the write DIO is within EOF, use a shared lock */ - if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode)) + /* + * If the write DIO is within EOF, use a shared lock and also only if + * security bits will likely not be dropped by file_remove_privs() called + * from btrfs_write_check(). Either will need to be rechecked after the + * lock was acquired. + */ + if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode)) ilock_flags |= BTRFS_ILOCK_SHARED; relock: @@ -1475,6 +1480,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (err < 0) return err; + /* Shared lock cannot be used with security bits set. */ + if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) { + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + ilock_flags &= ~BTRFS_ILOCK_SHARED; + goto relock; + } + err = generic_write_checks(iocb, from); if (err <= 0) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);

2 years

1
0
0 0

FAILED: patch "[PATCH] btrfs: file_remove_privs needs an exclusive lock in direct io" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 9af86694fd5d387992699ec99007ed374966ce9a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100400-appraisal-idealize-3243@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9af86694fd5d387992699ec99007ed374966ce9a Mon Sep 17 00:00:00 2001 From: Bernd Schubert <bschubert(a)ddn.com> Date: Wed, 6 Sep 2023 17:59:03 +0200 Subject: [PATCH] btrfs: file_remove_privs needs an exclusive lock in direct io write This was noticed by Miklos that file_remove_privs might call into notify_change(), which requires to hold an exclusive lock. The problem exists in FUSE and btrfs. We can fix it without any additional helpers from VFS, in case the privileges would need to be dropped, change the lock type to be exclusive and redo the loop. Fixes: e9adabb9712e ("btrfs: use shared lock for direct writes within EOF") CC: Miklos Szeredi <miklos(a)szeredi.hu> CC: stable(a)vger.kernel.org # 5.15+ Reviewed-by: Christoph Hellwig <hch(a)lst.de> Signed-off-by: Bernd Schubert <bschubert(a)ddn.com> Reviewed-by: David Sterba <dsterba(a)suse.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6edad7b9a5d3..e8726a83b649 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1466,8 +1466,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_NOWAIT) ilock_flags |= BTRFS_ILOCK_TRY; - /* If the write DIO is within EOF, use a shared lock */ - if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode)) + /* + * If the write DIO is within EOF, use a shared lock and also only if + * security bits will likely not be dropped by file_remove_privs() called + * from btrfs_write_check(). Either will need to be rechecked after the + * lock was acquired. + */ + if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode)) ilock_flags |= BTRFS_ILOCK_SHARED; relock: @@ -1475,6 +1480,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (err < 0) return err; + /* Shared lock cannot be used with security bits set. */ + if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) { + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + ilock_flags &= ~BTRFS_ILOCK_SHARED; + goto relock; + } + err = generic_write_checks(iocb, from); if (err <= 0) { btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);

2 years

1
0
0 0

FAILED: patch "[PATCH] arm64: cpufeature: Fix CLRBHB and BC detection" failed to apply to 6.5-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.5-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y git checkout FETCH_HEAD git cherry-pick -x 479965a2b7ec481737df0cadf553331063b9c343 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100423-autistic-impaired-361f@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 479965a2b7ec481737df0cadf553331063b9c343 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko <kristina.martsenko(a)arm.com> Date: Tue, 12 Sep 2023 14:34:29 +0100 Subject: [PATCH] arm64: cpufeature: Fix CLRBHB and BC detection ClearBHB support is indicated by the CLRBHB field in ID_AA64ISAR2_EL1. Following some refactoring the kernel incorrectly checks the BC field instead. Fix the detection to use the right field. (Note: The original ClearBHB support had it as FTR_HIGHER_SAFE, but this patch uses FTR_LOWER_SAFE, which seems more correct.) Also fix the detection of BC (hinted conditional branches) to use FTR_LOWER_SAFE, so that it is not reported on mismatched systems. Fixes: 356137e68a9f ("arm64/sysreg: Make BHB clear feature defines match the architecture") Fixes: 8fcc8285c0e3 ("arm64/sysreg: Convert ID_AA64ISAR2_EL1 to automatic generation") Cc: stable(a)vger.kernel.org Signed-off-by: Kristina Martsenko <kristina.martsenko(a)arm.com> Reviewed-by: Mark Brown <broonie(a)kernel.org> Link: https://lore.kernel.org/r/20230912133429.2606875-1-kristina.martsenko@arm.c… Signed-off-by: Will Deacon <will(a)kernel.org> diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 96e50227f940..5bba39376055 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -663,7 +663,7 @@ static inline bool supports_clearbhb(int scope) isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); return cpuid_feature_extract_unsigned_field(isar2, - ID_AA64ISAR2_EL1_BC_SHIFT); + ID_AA64ISAR2_EL1_CLRBHB_SHIFT); } const struct cpumask *system_32bit_el0_cpumask(void); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b018ae12ff5f..444a73c2e638 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -222,7 +222,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 2517ef7c21cf..76ce150e7347 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -1347,7 +1347,11 @@ UnsignedEnum 51:48 RPRFM 0b0000 NI 0b0001 IMP EndEnum -Res0 47:28 +Res0 47:32 +UnsignedEnum 31:28 CLRBHB + 0b0000 NI + 0b0001 IMP +EndEnum UnsignedEnum 27:24 PAC_frac 0b0000 NI 0b0001 IMP

2 years

1
0
0 0

FAILED: patch "[PATCH] drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x b7599d241778d0b10cdf7a5c755aa7db9b83250c # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100431-penalty-pegboard-c849@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b7599d241778d0b10cdf7a5c755aa7db9b83250c Mon Sep 17 00:00:00 2001 From: Javier Pello <devel(a)otheo.eu> Date: Sat, 2 Sep 2023 17:10:39 +0200 Subject: [PATCH] drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top There is an assertion in ggtt_reserve_guc_top that the global GTT is of size at least GUC_GGTT_TOP, which is not the case on a 32-bit platform; see commit 562d55d991b39ce376c492df2f7890fd6a541ffc ("drm/i915/bdw: Only use 2g GGTT for 32b platforms"). If GEM_BUG_ON is enabled, this triggers a BUG(); if GEM_BUG_ON is disabled, the subsequent reservation fails and the driver fails to initialise the device: i915 0000:00:02.0: [drm:i915_init_ggtt [i915]] Failed to reserve top of GGTT for GuC i915 0000:00:02.0: Device initialization failed (-28) i915 0000:00:02.0: Please file a bug on drm/i915; see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details. i915: probe of 0000:00:02.0 failed with error -28 Make the reservation at the top of the available space, whatever that is, instead of assuming that the top will be GUC_GGTT_TOP. Fixes: 911800765ef6 ("drm/i915/uc: Reserve upper range of GGTT") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9080 Signed-off-by: Javier Pello <devel(a)otheo.eu> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio(a)intel.com> Cc: Fernando Pacheco <fernando.pacheco(a)intel.com> Cc: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Jani Nikula <jani.nikula(a)linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com> Cc: intel-gfx(a)lists.freedesktop.org Cc: stable(a)vger.kernel.org # v5.3+ Signed-off-by: John Harrison <John.C.Harrison(a)Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230902171039.2229126186d697… (cherry picked from commit 0f3fa942d91165c2702577e9274d2ee1c7212afc) Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index dd0ed941441a..da21f2786b5d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +/* + * Reserve the top of the GuC address space for firmware images. Addresses + * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, + * which makes for a suitable range to hold GuC/HuC firmware images if the + * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT + * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk + * of the same size anyway, which is far more than needed, to keep the logic + * in uc_fw_ggtt_offset() simple. + */ +#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) + static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) { - u64 size; + u64 offset; int ret; if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; + GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); + offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; - ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, + GUC_TOP_RESERVE_SIZE, offset, + I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) drm_dbg(&ggtt->vm.i915->drm, "Failed to reserve top of GGTT for GuC\n");

2 years

1
0
0 0

FAILED: patch "[PATCH] drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x b7599d241778d0b10cdf7a5c755aa7db9b83250c # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100427-familiar-tabasco-df57@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b7599d241778d0b10cdf7a5c755aa7db9b83250c Mon Sep 17 00:00:00 2001 From: Javier Pello <devel(a)otheo.eu> Date: Sat, 2 Sep 2023 17:10:39 +0200 Subject: [PATCH] drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top There is an assertion in ggtt_reserve_guc_top that the global GTT is of size at least GUC_GGTT_TOP, which is not the case on a 32-bit platform; see commit 562d55d991b39ce376c492df2f7890fd6a541ffc ("drm/i915/bdw: Only use 2g GGTT for 32b platforms"). If GEM_BUG_ON is enabled, this triggers a BUG(); if GEM_BUG_ON is disabled, the subsequent reservation fails and the driver fails to initialise the device: i915 0000:00:02.0: [drm:i915_init_ggtt [i915]] Failed to reserve top of GGTT for GuC i915 0000:00:02.0: Device initialization failed (-28) i915 0000:00:02.0: Please file a bug on drm/i915; see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details. i915: probe of 0000:00:02.0 failed with error -28 Make the reservation at the top of the available space, whatever that is, instead of assuming that the top will be GUC_GGTT_TOP. Fixes: 911800765ef6 ("drm/i915/uc: Reserve upper range of GGTT") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9080 Signed-off-by: Javier Pello <devel(a)otheo.eu> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio(a)intel.com> Cc: Fernando Pacheco <fernando.pacheco(a)intel.com> Cc: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Jani Nikula <jani.nikula(a)linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com> Cc: intel-gfx(a)lists.freedesktop.org Cc: stable(a)vger.kernel.org # v5.3+ Signed-off-by: John Harrison <John.C.Harrison(a)Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230902171039.2229126186d697… (cherry picked from commit 0f3fa942d91165c2702577e9274d2ee1c7212afc) Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index dd0ed941441a..da21f2786b5d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +/* + * Reserve the top of the GuC address space for firmware images. Addresses + * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, + * which makes for a suitable range to hold GuC/HuC firmware images if the + * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT + * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk + * of the same size anyway, which is far more than needed, to keep the logic + * in uc_fw_ggtt_offset() simple. + */ +#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) + static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) { - u64 size; + u64 offset; int ret; if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; + GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); + offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; - ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, + GUC_TOP_RESERVE_SIZE, offset, + I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) drm_dbg(&ggtt->vm.i915->drm, "Failed to reserve top of GGTT for GuC\n");

2 years

1
0
0 0

FAILED: patch "[PATCH] drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x b7599d241778d0b10cdf7a5c755aa7db9b83250c # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100426-vocally-babbling-a2bc@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b7599d241778d0b10cdf7a5c755aa7db9b83250c Mon Sep 17 00:00:00 2001 From: Javier Pello <devel(a)otheo.eu> Date: Sat, 2 Sep 2023 17:10:39 +0200 Subject: [PATCH] drm/i915/gt: Fix reservation address in ggtt_reserve_guc_top There is an assertion in ggtt_reserve_guc_top that the global GTT is of size at least GUC_GGTT_TOP, which is not the case on a 32-bit platform; see commit 562d55d991b39ce376c492df2f7890fd6a541ffc ("drm/i915/bdw: Only use 2g GGTT for 32b platforms"). If GEM_BUG_ON is enabled, this triggers a BUG(); if GEM_BUG_ON is disabled, the subsequent reservation fails and the driver fails to initialise the device: i915 0000:00:02.0: [drm:i915_init_ggtt [i915]] Failed to reserve top of GGTT for GuC i915 0000:00:02.0: Device initialization failed (-28) i915 0000:00:02.0: Please file a bug on drm/i915; see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details. i915: probe of 0000:00:02.0 failed with error -28 Make the reservation at the top of the available space, whatever that is, instead of assuming that the top will be GUC_GGTT_TOP. Fixes: 911800765ef6 ("drm/i915/uc: Reserve upper range of GGTT") Link: https://gitlab.freedesktop.org/drm/intel/-/issues/9080 Signed-off-by: Javier Pello <devel(a)otheo.eu> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio(a)intel.com> Cc: Fernando Pacheco <fernando.pacheco(a)intel.com> Cc: Chris Wilson <chris(a)chris-wilson.co.uk> Cc: Jani Nikula <jani.nikula(a)linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com> Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com> Cc: intel-gfx(a)lists.freedesktop.org Cc: stable(a)vger.kernel.org # v5.3+ Signed-off-by: John Harrison <John.C.Harrison(a)Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20230902171039.2229126186d697… (cherry picked from commit 0f3fa942d91165c2702577e9274d2ee1c7212afc) Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index dd0ed941441a..da21f2786b5d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -511,20 +511,31 @@ void intel_ggtt_unbind_vma(struct i915_address_space *vm, vm->clear_range(vm, vma_res->start, vma_res->vma_size); } +/* + * Reserve the top of the GuC address space for firmware images. Addresses + * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, + * which makes for a suitable range to hold GuC/HuC firmware images if the + * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT + * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk + * of the same size anyway, which is far more than needed, to keep the logic + * in uc_fw_ggtt_offset() simple. + */ +#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) + static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) { - u64 size; + u64 offset; int ret; if (!intel_uc_uses_guc(&ggtt->vm.gt->uc)) return 0; - GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP); - size = ggtt->vm.total - GUC_GGTT_TOP; + GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); + offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; - ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size, - GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE, - PIN_NOEVICT); + ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, + GUC_TOP_RESERVE_SIZE, offset, + I915_COLOR_UNEVICTABLE, PIN_NOEVICT); if (ret) drm_dbg(&ggtt->vm.i915->drm, "Failed to reserve top of GGTT for GuC\n");

2 years

1
0
0 0

FAILED: patch "[PATCH] ata: libata-core: Do not register PM operations for SAS ports" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y git checkout FETCH_HEAD git cherry-pick -x 75e2bd5f1ede42a2bc88aa34b431e1ace8e0bea0 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100441-finalist-fox-8bb6@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 75e2bd5f1ede42a2bc88aa34b431e1ace8e0bea0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal <dlemoal(a)kernel.org> Date: Fri, 8 Sep 2023 20:04:52 +0900 Subject: [PATCH] ata: libata-core: Do not register PM operations for SAS ports libsas does its own domain based power management of ports. For such ports, libata should not use a device type defining power management operations as executing these operations for suspend/resume in addition to libsas calls to ata_sas_port_suspend() and ata_sas_port_resume() is not necessary (and likely dangerous to do, even though problems are not seen currently). Introduce the new ata_port_sas_type device_type for ports managed by libsas. This new device type is used in ata_tport_add() and is defined without power management operations. Fixes: 2fcbdcb4c802 ("[SCSI] libata: export ata_port suspend/resume infrastructure for sas") Cc: stable(a)vger.kernel.org Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org> Reviewed-by: Hannes Reinecke <hare(a)suse.de> Tested-by: Chia-Lin Kao (AceLan) <acelan.kao(a)canonical.com> Tested-by: Geert Uytterhoeven <geert+renesas(a)glider.be> Reviewed-by: John Garry <john.g.garry(a)oracle.com> Reviewed-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 092372334e92..261445c1851b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5335,7 +5335,7 @@ EXPORT_SYMBOL_GPL(ata_host_resume); #endif const struct device_type ata_port_type = { - .name = "ata_port", + .name = ATA_PORT_TYPE_NAME, #ifdef CONFIG_PM .pm = &ata_port_pm_ops, #endif diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index e4fb9d1b9b39..3e49a877500e 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -266,6 +266,10 @@ void ata_tport_delete(struct ata_port *ap) put_device(dev); } +static const struct device_type ata_port_sas_type = { + .name = ATA_PORT_TYPE_NAME, +}; + /** ata_tport_add - initialize a transport ATA port structure * * @parent: parent device @@ -283,7 +287,10 @@ int ata_tport_add(struct device *parent, struct device *dev = &ap->tdev; device_initialize(dev); - dev->type = &ata_port_type; + if (ap->flags & ATA_FLAG_SAS_HOST) + dev->type = &ata_port_sas_type; + else + dev->type = &ata_port_type; dev->parent = parent; ata_host_get(ap->host); diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 820299bd9d06..05ac80da8ebc 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -30,6 +30,8 @@ enum { ATA_DNXFER_QUIET = (1 << 31), }; +#define ATA_PORT_TYPE_NAME "ata_port" + extern atomic_t ata_print_id; extern int atapi_passthru16; extern int libata_fua;

2 years

1
0
0 0

FAILED: patch "[PATCH] btrfs: don't clear uptodate on write errors" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x b595d25996329427b2c09d4b90395a165fb3ef8e # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100402-reprint-snugness-793f@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b595d25996329427b2c09d4b90395a165fb3ef8e Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef(a)toxicpanda.com> Date: Fri, 8 Sep 2023 15:31:39 -0400 Subject: [PATCH] btrfs: don't clear uptodate on write errors We have been consistently seeing hangs with generic/648 in our subpage GitHub CI setup. This is a classic deadlock, we are calling btrfs_read_folio() on a folio, which requires holding the folio lock on the folio, and then finding a ordered extent that overlaps that range and calling btrfs_start_ordered_extent(), which then tries to write out the dirty page, which requires taking the folio lock and then we deadlock. The hang happens because we're writing to range [1271750656, 1271767040), page index [77621, 77622], and page 77621 is !Uptodate. It is also Dirty, so we call btrfs_read_folio() for 77621 and which does btrfs_lock_and_flush_ordered_range() for that range, and we find an ordered extent which is [1271644160, 1271746560), page index [77615, 77621]. The page indexes overlap, but the actual bytes don't overlap. We're holding the page lock for 77621, then call btrfs_lock_and_flush_ordered_range() which tries to flush the dirty page, and tries to lock 77621 again and then we deadlock. The byte ranges do not overlap, but with subpage support if we clear uptodate on any portion of the page we mark the entire thing as not uptodate. We have been clearing page uptodate on write errors, but no other file system does this, and is in fact incorrect. This doesn't hurt us in the !subpage case because we can't end up with overlapped ranges that don't also overlap on the page. Fix this by not clearing uptodate when we have a write error. The only thing we should be doing in this case is setting the mapping error and carrying on. This makes it so we would no longer call btrfs_read_folio() on the page as it's uptodate and eliminates the deadlock. With this patch we're now able to make it through a full fstests run on our subpage blocksize VMs. Note for stable backports: this probably goes beyond 6.1 but the code has been cleaned up and clearing the uptodate bit must be verified on each version independently. CC: stable(a)vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo <wqu(a)suse.com> Signed-off-by: Josef Bacik <josef(a)toxicpanda.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ac3fca5a5e41..6954ae763b86 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -484,10 +484,8 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio) bvec->bv_offset, bvec->bv_len); btrfs_finish_ordered_extent(bbio->ordered, page, start, len, !error); - if (error) { - btrfs_page_clear_uptodate(fs_info, page, start, len); + if (error) mapping_set_error(page->mapping, error); - } btrfs_page_clear_writeback(fs_info, page, start, len); } @@ -1456,8 +1454,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(btrfs_sb(inode->i_sb), page, - page_start, PAGE_SIZE); mapping_set_error(page->mapping, ret); } unlock_page(page); @@ -1624,8 +1620,6 @@ static void extent_buffer_write_end_io(struct btrfs_bio *bbio) struct page *page = bvec->bv_page; u32 len = bvec->bv_len; - if (!uptodate) - btrfs_page_clear_uptodate(fs_info, page, start, len); btrfs_page_clear_writeback(fs_info, page, start, len); bio_offset += len; } @@ -2201,7 +2195,6 @@ void extent_write_locked_range(struct inode *inode, struct page *locked_page, if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, cur, cur_len, !ret); - btrfs_page_clear_uptodate(fs_info, page, cur, cur_len); mapping_set_error(page->mapping, ret); } btrfs_page_unlock_writer(fs_info, page, cur, cur_len); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e211e88c6545..616fdcf40467 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1085,9 +1085,6 @@ static void submit_uncompressed_range(struct btrfs_inode *inode, btrfs_mark_ordered_io_finished(inode, locked_page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(inode->root->fs_info, - locked_page, page_start, - PAGE_SIZE); mapping_set_error(locked_page->mapping, ret); unlock_page(locked_page); } @@ -2791,7 +2788,6 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) mapping_set_error(page->mapping, ret); btrfs_mark_ordered_io_finished(inode, page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(fs_info, page, page_start, PAGE_SIZE); clear_page_dirty_for_io(page); } btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);

2 years

1
0
0 0

FAILED: patch "[PATCH] btrfs: don't clear uptodate on write errors" failed to apply to 6.5-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.5-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y git checkout FETCH_HEAD git cherry-pick -x b595d25996329427b2c09d4b90395a165fb3ef8e # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100400-garbage-caboose-c859@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b595d25996329427b2c09d4b90395a165fb3ef8e Mon Sep 17 00:00:00 2001 From: Josef Bacik <josef(a)toxicpanda.com> Date: Fri, 8 Sep 2023 15:31:39 -0400 Subject: [PATCH] btrfs: don't clear uptodate on write errors We have been consistently seeing hangs with generic/648 in our subpage GitHub CI setup. This is a classic deadlock, we are calling btrfs_read_folio() on a folio, which requires holding the folio lock on the folio, and then finding a ordered extent that overlaps that range and calling btrfs_start_ordered_extent(), which then tries to write out the dirty page, which requires taking the folio lock and then we deadlock. The hang happens because we're writing to range [1271750656, 1271767040), page index [77621, 77622], and page 77621 is !Uptodate. It is also Dirty, so we call btrfs_read_folio() for 77621 and which does btrfs_lock_and_flush_ordered_range() for that range, and we find an ordered extent which is [1271644160, 1271746560), page index [77615, 77621]. The page indexes overlap, but the actual bytes don't overlap. We're holding the page lock for 77621, then call btrfs_lock_and_flush_ordered_range() which tries to flush the dirty page, and tries to lock 77621 again and then we deadlock. The byte ranges do not overlap, but with subpage support if we clear uptodate on any portion of the page we mark the entire thing as not uptodate. We have been clearing page uptodate on write errors, but no other file system does this, and is in fact incorrect. This doesn't hurt us in the !subpage case because we can't end up with overlapped ranges that don't also overlap on the page. Fix this by not clearing uptodate when we have a write error. The only thing we should be doing in this case is setting the mapping error and carrying on. This makes it so we would no longer call btrfs_read_folio() on the page as it's uptodate and eliminates the deadlock. With this patch we're now able to make it through a full fstests run on our subpage blocksize VMs. Note for stable backports: this probably goes beyond 6.1 but the code has been cleaned up and clearing the uptodate bit must be verified on each version independently. CC: stable(a)vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo <wqu(a)suse.com> Signed-off-by: Josef Bacik <josef(a)toxicpanda.com> Signed-off-by: David Sterba <dsterba(a)suse.com> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ac3fca5a5e41..6954ae763b86 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -484,10 +484,8 @@ static void end_bio_extent_writepage(struct btrfs_bio *bbio) bvec->bv_offset, bvec->bv_len); btrfs_finish_ordered_extent(bbio->ordered, page, start, len, !error); - if (error) { - btrfs_page_clear_uptodate(fs_info, page, start, len); + if (error) mapping_set_error(page->mapping, error); - } btrfs_page_clear_writeback(fs_info, page, start, len); } @@ -1456,8 +1454,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(btrfs_sb(inode->i_sb), page, - page_start, PAGE_SIZE); mapping_set_error(page->mapping, ret); } unlock_page(page); @@ -1624,8 +1620,6 @@ static void extent_buffer_write_end_io(struct btrfs_bio *bbio) struct page *page = bvec->bv_page; u32 len = bvec->bv_len; - if (!uptodate) - btrfs_page_clear_uptodate(fs_info, page, start, len); btrfs_page_clear_writeback(fs_info, page, start, len); bio_offset += len; } @@ -2201,7 +2195,6 @@ void extent_write_locked_range(struct inode *inode, struct page *locked_page, if (ret) { btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, cur, cur_len, !ret); - btrfs_page_clear_uptodate(fs_info, page, cur, cur_len); mapping_set_error(page->mapping, ret); } btrfs_page_unlock_writer(fs_info, page, cur, cur_len); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e211e88c6545..616fdcf40467 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1085,9 +1085,6 @@ static void submit_uncompressed_range(struct btrfs_inode *inode, btrfs_mark_ordered_io_finished(inode, locked_page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(inode->root->fs_info, - locked_page, page_start, - PAGE_SIZE); mapping_set_error(locked_page->mapping, ret); unlock_page(locked_page); } @@ -2791,7 +2788,6 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) mapping_set_error(page->mapping, ret); btrfs_mark_ordered_io_finished(inode, page, page_start, PAGE_SIZE, !ret); - btrfs_page_clear_uptodate(fs_info, page, page_start, PAGE_SIZE); clear_page_dirty_for_io(page); } btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);

2 years

1
0
0 0

FAILED: patch "[PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y git checkout FETCH_HEAD git cherry-pick -x 45d99ea451d0c30bfd4864f0fe485d7dac014902 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100440-armory-unbalance-bd33@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 45d99ea451d0c30bfd4864f0fe485d7dac014902 Mon Sep 17 00:00:00 2001 From: Zheng Yejian <zhengyejian1(a)huawei.com> Date: Thu, 21 Sep 2023 20:54:25 +0800 Subject: [PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats The 'bytes' info in file 'per_cpu/cpu<X>/stats' means the number of bytes in cpu buffer that have not been consumed. However, currently after consuming data by reading file 'trace_pipe', the 'bytes' info was not changed as expected. # cat per_cpu/cpu0/stats entries: 0 overrun: 0 commit overrun: 0 bytes: 568 <--- 'bytes' is problematical !!! oldest event ts: 8651.371479 now ts: 8653.912224 dropped events: 0 read events: 8 The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it: 1. When stat 'read_bytes', account consumed event in rb_advance_reader(); 2. When stat 'entries_bytes', exclude the discarded padding event which is smaller than minimum size because it is invisible to reader. Then use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for page-based read/remove/overrun. Also correct the comments of ring_buffer_bytes_cpu() in this patch. Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengye… Cc: stable(a)vger.kernel.org Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events") Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1651edc48d5..28daf0ce95c5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) +{ + return local_read(&bpage->page->commit); +} + static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); @@ -2003,7 +2008,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } @@ -2367,11 +2372,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) -{ - return local_read(&bpage->page->commit); -} - static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { @@ -2517,7 +2517,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); /* @@ -2660,9 +2660,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); - /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); - /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2676,7 +2673,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure - * that this space is not used again. + * that this space is not used again, and this space will + * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. @@ -2701,6 +2699,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); @@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** - * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ @@ -4723,6 +4724,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) length = rb_event_length(event); cpu_buffer->reader_page->read += length; + cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) @@ -5816,7 +5818,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); - cpu_buffer->read_bytes += BUF_PAGE_SIZE; + cpu_buffer->read_bytes += rb_page_commit(reader); /* swap the pages */ rb_init_page(bpage);

2 years

1
0
0 0

FAILED: patch "[PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 45d99ea451d0c30bfd4864f0fe485d7dac014902 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100438-palpable-marina-68c8@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 45d99ea451d0c30bfd4864f0fe485d7dac014902 Mon Sep 17 00:00:00 2001 From: Zheng Yejian <zhengyejian1(a)huawei.com> Date: Thu, 21 Sep 2023 20:54:25 +0800 Subject: [PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats The 'bytes' info in file 'per_cpu/cpu<X>/stats' means the number of bytes in cpu buffer that have not been consumed. However, currently after consuming data by reading file 'trace_pipe', the 'bytes' info was not changed as expected. # cat per_cpu/cpu0/stats entries: 0 overrun: 0 commit overrun: 0 bytes: 568 <--- 'bytes' is problematical !!! oldest event ts: 8651.371479 now ts: 8653.912224 dropped events: 0 read events: 8 The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it: 1. When stat 'read_bytes', account consumed event in rb_advance_reader(); 2. When stat 'entries_bytes', exclude the discarded padding event which is smaller than minimum size because it is invisible to reader. Then use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for page-based read/remove/overrun. Also correct the comments of ring_buffer_bytes_cpu() in this patch. Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengye… Cc: stable(a)vger.kernel.org Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events") Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1651edc48d5..28daf0ce95c5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) +{ + return local_read(&bpage->page->commit); +} + static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); @@ -2003,7 +2008,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } @@ -2367,11 +2372,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) -{ - return local_read(&bpage->page->commit); -} - static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { @@ -2517,7 +2517,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); /* @@ -2660,9 +2660,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); - /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); - /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2676,7 +2673,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure - * that this space is not used again. + * that this space is not used again, and this space will + * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. @@ -2701,6 +2699,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); @@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** - * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ @@ -4723,6 +4724,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) length = rb_event_length(event); cpu_buffer->reader_page->read += length; + cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) @@ -5816,7 +5818,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); - cpu_buffer->read_bytes += BUF_PAGE_SIZE; + cpu_buffer->read_bytes += rb_page_commit(reader); /* swap the pages */ rb_init_page(bpage);

2 years

1
0
0 0

FAILED: patch "[PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 45d99ea451d0c30bfd4864f0fe485d7dac014902 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100437-backyard-cartwheel-4b2f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 45d99ea451d0c30bfd4864f0fe485d7dac014902 Mon Sep 17 00:00:00 2001 From: Zheng Yejian <zhengyejian1(a)huawei.com> Date: Thu, 21 Sep 2023 20:54:25 +0800 Subject: [PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats The 'bytes' info in file 'per_cpu/cpu<X>/stats' means the number of bytes in cpu buffer that have not been consumed. However, currently after consuming data by reading file 'trace_pipe', the 'bytes' info was not changed as expected. # cat per_cpu/cpu0/stats entries: 0 overrun: 0 commit overrun: 0 bytes: 568 <--- 'bytes' is problematical !!! oldest event ts: 8651.371479 now ts: 8653.912224 dropped events: 0 read events: 8 The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it: 1. When stat 'read_bytes', account consumed event in rb_advance_reader(); 2. When stat 'entries_bytes', exclude the discarded padding event which is smaller than minimum size because it is invisible to reader. Then use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for page-based read/remove/overrun. Also correct the comments of ring_buffer_bytes_cpu() in this patch. Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengye… Cc: stable(a)vger.kernel.org Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events") Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1651edc48d5..28daf0ce95c5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) +{ + return local_read(&bpage->page->commit); +} + static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); @@ -2003,7 +2008,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } @@ -2367,11 +2372,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) -{ - return local_read(&bpage->page->commit); -} - static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { @@ -2517,7 +2517,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); /* @@ -2660,9 +2660,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); - /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); - /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2676,7 +2673,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure - * that this space is not used again. + * that this space is not used again, and this space will + * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. @@ -2701,6 +2699,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); @@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** - * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ @@ -4723,6 +4724,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) length = rb_event_length(event); cpu_buffer->reader_page->read += length; + cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) @@ -5816,7 +5818,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); - cpu_buffer->read_bytes += BUF_PAGE_SIZE; + cpu_buffer->read_bytes += rb_page_commit(reader); /* swap the pages */ rb_init_page(bpage);

2 years

1
0
0 0

FAILED: patch "[PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 45d99ea451d0c30bfd4864f0fe485d7dac014902 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100436-ergonomic-enviably-bf14@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 45d99ea451d0c30bfd4864f0fe485d7dac014902 Mon Sep 17 00:00:00 2001 From: Zheng Yejian <zhengyejian1(a)huawei.com> Date: Thu, 21 Sep 2023 20:54:25 +0800 Subject: [PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats The 'bytes' info in file 'per_cpu/cpu<X>/stats' means the number of bytes in cpu buffer that have not been consumed. However, currently after consuming data by reading file 'trace_pipe', the 'bytes' info was not changed as expected. # cat per_cpu/cpu0/stats entries: 0 overrun: 0 commit overrun: 0 bytes: 568 <--- 'bytes' is problematical !!! oldest event ts: 8651.371479 now ts: 8653.912224 dropped events: 0 read events: 8 The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it: 1. When stat 'read_bytes', account consumed event in rb_advance_reader(); 2. When stat 'entries_bytes', exclude the discarded padding event which is smaller than minimum size because it is invisible to reader. Then use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for page-based read/remove/overrun. Also correct the comments of ring_buffer_bytes_cpu() in this patch. Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengye… Cc: stable(a)vger.kernel.org Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events") Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1651edc48d5..28daf0ce95c5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) +{ + return local_read(&bpage->page->commit); +} + static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); @@ -2003,7 +2008,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } @@ -2367,11 +2372,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) -{ - return local_read(&bpage->page->commit); -} - static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { @@ -2517,7 +2517,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); /* @@ -2660,9 +2660,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); - /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); - /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2676,7 +2673,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure - * that this space is not used again. + * that this space is not used again, and this space will + * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. @@ -2701,6 +2699,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); @@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** - * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ @@ -4723,6 +4724,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) length = rb_event_length(event); cpu_buffer->reader_page->read += length; + cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) @@ -5816,7 +5818,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); - cpu_buffer->read_bytes += BUF_PAGE_SIZE; + cpu_buffer->read_bytes += rb_page_commit(reader); /* swap the pages */ rb_init_page(bpage);

2 years

1
0
0 0

FAILED: patch "[PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 45d99ea451d0c30bfd4864f0fe485d7dac014902 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100434-majestic-daisy-4a02@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 45d99ea451d0c30bfd4864f0fe485d7dac014902 Mon Sep 17 00:00:00 2001 From: Zheng Yejian <zhengyejian1(a)huawei.com> Date: Thu, 21 Sep 2023 20:54:25 +0800 Subject: [PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats The 'bytes' info in file 'per_cpu/cpu<X>/stats' means the number of bytes in cpu buffer that have not been consumed. However, currently after consuming data by reading file 'trace_pipe', the 'bytes' info was not changed as expected. # cat per_cpu/cpu0/stats entries: 0 overrun: 0 commit overrun: 0 bytes: 568 <--- 'bytes' is problematical !!! oldest event ts: 8651.371479 now ts: 8653.912224 dropped events: 0 read events: 8 The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it: 1. When stat 'read_bytes', account consumed event in rb_advance_reader(); 2. When stat 'entries_bytes', exclude the discarded padding event which is smaller than minimum size because it is invisible to reader. Then use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for page-based read/remove/overrun. Also correct the comments of ring_buffer_bytes_cpu() in this patch. Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengye… Cc: stable(a)vger.kernel.org Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events") Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1651edc48d5..28daf0ce95c5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) +{ + return local_read(&bpage->page->commit); +} + static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); @@ -2003,7 +2008,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } @@ -2367,11 +2372,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) -{ - return local_read(&bpage->page->commit); -} - static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { @@ -2517,7 +2517,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); /* @@ -2660,9 +2660,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); - /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); - /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2676,7 +2673,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure - * that this space is not used again. + * that this space is not used again, and this space will + * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. @@ -2701,6 +2699,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); @@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** - * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ @@ -4723,6 +4724,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) length = rb_event_length(event); cpu_buffer->reader_page->read += length; + cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) @@ -5816,7 +5818,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); - cpu_buffer->read_bytes += BUF_PAGE_SIZE; + cpu_buffer->read_bytes += rb_page_commit(reader); /* swap the pages */ rb_init_page(bpage);

2 years

1
0
0 0

FAILED: patch "[PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 45d99ea451d0c30bfd4864f0fe485d7dac014902 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100433-pumice-despise-a596@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 45d99ea451d0c30bfd4864f0fe485d7dac014902 Mon Sep 17 00:00:00 2001 From: Zheng Yejian <zhengyejian1(a)huawei.com> Date: Thu, 21 Sep 2023 20:54:25 +0800 Subject: [PATCH] ring-buffer: Fix bytes info in per_cpu buffer stats The 'bytes' info in file 'per_cpu/cpu<X>/stats' means the number of bytes in cpu buffer that have not been consumed. However, currently after consuming data by reading file 'trace_pipe', the 'bytes' info was not changed as expected. # cat per_cpu/cpu0/stats entries: 0 overrun: 0 commit overrun: 0 bytes: 568 <--- 'bytes' is problematical !!! oldest event ts: 8651.371479 now ts: 8653.912224 dropped events: 0 read events: 8 The root cause is incorrect stat on cpu_buffer->read_bytes. To fix it: 1. When stat 'read_bytes', account consumed event in rb_advance_reader(); 2. When stat 'entries_bytes', exclude the discarded padding event which is smaller than minimum size because it is invisible to reader. Then use rb_page_commit() instead of BUF_PAGE_SIZE at where accounting for page-based read/remove/overrun. Also correct the comments of ring_buffer_bytes_cpu() in this patch. Link: https://lore.kernel.org/linux-trace-kernel/20230921125425.1708423-1-zhengye… Cc: stable(a)vger.kernel.org Fixes: c64e148a3be3 ("trace: Add ring buffer stats to measure rate of events") Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com> Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a1651edc48d5..28daf0ce95c5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -354,6 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage) local_set(&bpage->commit, 0); } +static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage) +{ + return local_read(&bpage->page->commit); +} + static void free_buffer_page(struct buffer_page *bpage) { free_page((unsigned long)bpage->page); @@ -2003,7 +2008,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) * Increment overrun to account for the lost events. */ local_add(page_entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); } @@ -2367,11 +2372,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read); } -static __always_inline unsigned rb_page_commit(struct buffer_page *bpage) -{ - return local_read(&bpage->page->commit); -} - static struct ring_buffer_event * rb_iter_head_event(struct ring_buffer_iter *iter) { @@ -2517,7 +2517,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); - local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes); local_inc(&cpu_buffer->pages_lost); /* @@ -2660,9 +2660,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); - /* account for padding bytes */ - local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); - /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2676,7 +2673,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, * write counter enough to allow another writer to slip * in on this page. * We put in a discarded commit instead, to make sure - * that this space is not used again. + * that this space is not used again, and this space will + * not be accounted into 'entries_bytes'. * * If we are less than the minimum size, we don't need to * worry about it. @@ -2701,6 +2699,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, /* time delta must be non zero */ event->time_delta = 1; + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* Make sure the padding is visible before the tail_page->write update */ smp_wmb(); @@ -4215,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); /** - * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to read from. */ @@ -4723,6 +4724,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) length = rb_event_length(event); cpu_buffer->reader_page->read += length; + cpu_buffer->read_bytes += length; } static void rb_advance_iter(struct ring_buffer_iter *iter) @@ -5816,7 +5818,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); - cpu_buffer->read_bytes += BUF_PAGE_SIZE; + cpu_buffer->read_bytes += rb_page_commit(reader); /* swap the pages */ rb_init_page(bpage);

2 years

1
0
0 0

FAILED: patch "[PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y git checkout FETCH_HEAD git cherry-pick -x 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100431-ascertain-evade-5e53@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 Mon Sep 17 00:00:00 2001 From: Yang Shi <yang(a)os.amperecomputing.com> Date: Wed, 20 Sep 2023 15:32:42 -0700 Subject: [PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel should attempt to migrate all existing pages, and return -EIO if there is misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") messed up the return value and didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke the VMA walk early if unmovable page is met, it may cause some pages are not migrated as expected. The code should conceptually do: if (MPOL_MF_MOVE|MOVEALL) scan all vmas try to migrate the existing pages return success else if (MPOL_MF_MOVE* | MPOL_MF_STRICT) scan all vmas try to migrate the existing pages return -EIO if unmovable or migration failed else /* MPOL_MF_STRICT alone */ break early if meets unmovable and don't call mbind_range() at all else /* none of those flags */ check the ranges in test_walk, EFAULT without mbind_range() if discontig. Fixed the behavior. Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.… Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Rafael Aquini <aquini(a)redhat.com> Cc: Kirill A. Shutemov <kirill(a)shutemov.name> Cc: David Rientjes <rientjes(a)google.com> Cc: <stable(a)vger.kernel.org> [4.9+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 42b5567e3773..f1b00d6ac7ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -426,6 +426,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio, /* * queue_folios_pmd() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an * existing folio was already on a node that does not follow the * policy. @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || migrate_folio_add(folio, qp->pagelist, flags)) { - ret = 1; + qp->has_unmovable = true; goto unlock; } } else @@ -495,9 +495,8 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, * * queue_folios_pte_range() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, * need migrate other LRU pages. */ if (migrate_folio_add(folio, qp->pagelist, flags)) - has_unmovable = true; + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Detecting misplaced folio but allow migrating folios which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Failed to isolate folio but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; const struct mm_walk_ops *ops = lock_vma ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out:

2 years

1
0
0 0

FAILED: patch "[PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100429-unwatched-blaspheme-2d9d@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 Mon Sep 17 00:00:00 2001 From: Yang Shi <yang(a)os.amperecomputing.com> Date: Wed, 20 Sep 2023 15:32:42 -0700 Subject: [PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel should attempt to migrate all existing pages, and return -EIO if there is misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") messed up the return value and didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke the VMA walk early if unmovable page is met, it may cause some pages are not migrated as expected. The code should conceptually do: if (MPOL_MF_MOVE|MOVEALL) scan all vmas try to migrate the existing pages return success else if (MPOL_MF_MOVE* | MPOL_MF_STRICT) scan all vmas try to migrate the existing pages return -EIO if unmovable or migration failed else /* MPOL_MF_STRICT alone */ break early if meets unmovable and don't call mbind_range() at all else /* none of those flags */ check the ranges in test_walk, EFAULT without mbind_range() if discontig. Fixed the behavior. Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.… Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Rafael Aquini <aquini(a)redhat.com> Cc: Kirill A. Shutemov <kirill(a)shutemov.name> Cc: David Rientjes <rientjes(a)google.com> Cc: <stable(a)vger.kernel.org> [4.9+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 42b5567e3773..f1b00d6ac7ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -426,6 +426,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio, /* * queue_folios_pmd() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an * existing folio was already on a node that does not follow the * policy. @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || migrate_folio_add(folio, qp->pagelist, flags)) { - ret = 1; + qp->has_unmovable = true; goto unlock; } } else @@ -495,9 +495,8 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, * * queue_folios_pte_range() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, * need migrate other LRU pages. */ if (migrate_folio_add(folio, qp->pagelist, flags)) - has_unmovable = true; + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Detecting misplaced folio but allow migrating folios which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Failed to isolate folio but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; const struct mm_walk_ops *ops = lock_vma ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out:

2 years

1
0
0 0

FAILED: patch "[PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100427-urban-cognition-691b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 Mon Sep 17 00:00:00 2001 From: Yang Shi <yang(a)os.amperecomputing.com> Date: Wed, 20 Sep 2023 15:32:42 -0700 Subject: [PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel should attempt to migrate all existing pages, and return -EIO if there is misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") messed up the return value and didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke the VMA walk early if unmovable page is met, it may cause some pages are not migrated as expected. The code should conceptually do: if (MPOL_MF_MOVE|MOVEALL) scan all vmas try to migrate the existing pages return success else if (MPOL_MF_MOVE* | MPOL_MF_STRICT) scan all vmas try to migrate the existing pages return -EIO if unmovable or migration failed else /* MPOL_MF_STRICT alone */ break early if meets unmovable and don't call mbind_range() at all else /* none of those flags */ check the ranges in test_walk, EFAULT without mbind_range() if discontig. Fixed the behavior. Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.… Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Rafael Aquini <aquini(a)redhat.com> Cc: Kirill A. Shutemov <kirill(a)shutemov.name> Cc: David Rientjes <rientjes(a)google.com> Cc: <stable(a)vger.kernel.org> [4.9+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 42b5567e3773..f1b00d6ac7ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -426,6 +426,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio, /* * queue_folios_pmd() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an * existing folio was already on a node that does not follow the * policy. @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || migrate_folio_add(folio, qp->pagelist, flags)) { - ret = 1; + qp->has_unmovable = true; goto unlock; } } else @@ -495,9 +495,8 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, * * queue_folios_pte_range() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, * need migrate other LRU pages. */ if (migrate_folio_add(folio, qp->pagelist, flags)) - has_unmovable = true; + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Detecting misplaced folio but allow migrating folios which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Failed to isolate folio but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; const struct mm_walk_ops *ops = lock_vma ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out:

2 years

1
0
0 0

FAILED: patch "[PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100424-koala-ivory-c2e4@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 Mon Sep 17 00:00:00 2001 From: Yang Shi <yang(a)os.amperecomputing.com> Date: Wed, 20 Sep 2023 15:32:42 -0700 Subject: [PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel should attempt to migrate all existing pages, and return -EIO if there is misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") messed up the return value and didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke the VMA walk early if unmovable page is met, it may cause some pages are not migrated as expected. The code should conceptually do: if (MPOL_MF_MOVE|MOVEALL) scan all vmas try to migrate the existing pages return success else if (MPOL_MF_MOVE* | MPOL_MF_STRICT) scan all vmas try to migrate the existing pages return -EIO if unmovable or migration failed else /* MPOL_MF_STRICT alone */ break early if meets unmovable and don't call mbind_range() at all else /* none of those flags */ check the ranges in test_walk, EFAULT without mbind_range() if discontig. Fixed the behavior. Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.… Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Rafael Aquini <aquini(a)redhat.com> Cc: Kirill A. Shutemov <kirill(a)shutemov.name> Cc: David Rientjes <rientjes(a)google.com> Cc: <stable(a)vger.kernel.org> [4.9+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 42b5567e3773..f1b00d6ac7ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -426,6 +426,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio, /* * queue_folios_pmd() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an * existing folio was already on a node that does not follow the * policy. @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || migrate_folio_add(folio, qp->pagelist, flags)) { - ret = 1; + qp->has_unmovable = true; goto unlock; } } else @@ -495,9 +495,8 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, * * queue_folios_pte_range() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, * need migrate other LRU pages. */ if (migrate_folio_add(folio, qp->pagelist, flags)) - has_unmovable = true; + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Detecting misplaced folio but allow migrating folios which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Failed to isolate folio but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; const struct mm_walk_ops *ops = lock_vma ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out:

2 years

1
0
0 0

FAILED: patch "[PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100422-twistable-umbrella-a71d@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 Mon Sep 17 00:00:00 2001 From: Yang Shi <yang(a)os.amperecomputing.com> Date: Wed, 20 Sep 2023 15:32:42 -0700 Subject: [PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel should attempt to migrate all existing pages, and return -EIO if there is misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") messed up the return value and didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke the VMA walk early if unmovable page is met, it may cause some pages are not migrated as expected. The code should conceptually do: if (MPOL_MF_MOVE|MOVEALL) scan all vmas try to migrate the existing pages return success else if (MPOL_MF_MOVE* | MPOL_MF_STRICT) scan all vmas try to migrate the existing pages return -EIO if unmovable or migration failed else /* MPOL_MF_STRICT alone */ break early if meets unmovable and don't call mbind_range() at all else /* none of those flags */ check the ranges in test_walk, EFAULT without mbind_range() if discontig. Fixed the behavior. Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.… Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Rafael Aquini <aquini(a)redhat.com> Cc: Kirill A. Shutemov <kirill(a)shutemov.name> Cc: David Rientjes <rientjes(a)google.com> Cc: <stable(a)vger.kernel.org> [4.9+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 42b5567e3773..f1b00d6ac7ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -426,6 +426,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio, /* * queue_folios_pmd() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an * existing folio was already on a node that does not follow the * policy. @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || migrate_folio_add(folio, qp->pagelist, flags)) { - ret = 1; + qp->has_unmovable = true; goto unlock; } } else @@ -495,9 +495,8 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, * * queue_folios_pte_range() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, * need migrate other LRU pages. */ if (migrate_folio_add(folio, qp->pagelist, flags)) - has_unmovable = true; + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Detecting misplaced folio but allow migrating folios which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Failed to isolate folio but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; const struct mm_walk_ops *ops = lock_vma ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out:

2 years

1
0
0 0

FAILED: patch "[PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100420-grafting-raft-1e1b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 24526268f4e38c9ec0c4a30de4f37ad2a2a84e47 Mon Sep 17 00:00:00 2001 From: Yang Shi <yang(a)os.amperecomputing.com> Date: Wed, 20 Sep 2023 15:32:42 -0700 Subject: [PATCH] mm: mempolicy: keep VMA walk if both MPOL_MF_STRICT and MPOL_MF_MOVE are specified When calling mbind() with MPOL_MF_{MOVE|MOVEALL} | MPOL_MF_STRICT, kernel should attempt to migrate all existing pages, and return -EIO if there is misplaced or unmovable page. Then commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") messed up the return value and didn't break VMA scan early ianymore when MPOL_MF_STRICT alone. The return value problem was fixed by commit a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified"), but it broke the VMA walk early if unmovable page is met, it may cause some pages are not migrated as expected. The code should conceptually do: if (MPOL_MF_MOVE|MOVEALL) scan all vmas try to migrate the existing pages return success else if (MPOL_MF_MOVE* | MPOL_MF_STRICT) scan all vmas try to migrate the existing pages return -EIO if unmovable or migration failed else /* MPOL_MF_STRICT alone */ break early if meets unmovable and don't call mbind_range() at all else /* none of those flags */ check the ranges in test_walk, EFAULT without mbind_range() if discontig. Fixed the behavior. Link: https://lkml.kernel.org/r/20230920223242.3425775-1-yang@os.amperecomputing.… Fixes: a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified") Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Suren Baghdasaryan <surenb(a)google.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Oscar Salvador <osalvador(a)suse.de> Cc: Rafael Aquini <aquini(a)redhat.com> Cc: Kirill A. Shutemov <kirill(a)shutemov.name> Cc: David Rientjes <rientjes(a)google.com> Cc: <stable(a)vger.kernel.org> [4.9+] Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 42b5567e3773..f1b00d6ac7ee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -426,6 +426,7 @@ struct queue_pages { unsigned long start; unsigned long end; struct vm_area_struct *first; + bool has_unmovable; }; /* @@ -446,9 +447,8 @@ static inline bool queue_folio_required(struct folio *folio, /* * queue_folios_pmd() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. huge zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an * existing folio was already on a node that does not follow the * policy. @@ -479,7 +479,7 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { if (!vma_migratable(walk->vma) || migrate_folio_add(folio, qp->pagelist, flags)) { - ret = 1; + qp->has_unmovable = true; goto unlock; } } else @@ -495,9 +495,8 @@ static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, * * queue_folios_pte_range() has three possible return values: * 0 - folios are placed on the right node or queued successfully, or - * special page is met, i.e. zero page. - * 1 - there is unmovable folio, and MPOL_MF_MOVE* & MPOL_MF_STRICT were - * specified. + * special page is met, i.e. zero page, or unmovable page is found + * but continue walking (indicated by queue_pages.has_unmovable). * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already * on a node that does not follow the policy. */ @@ -508,7 +507,6 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, struct folio *folio; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - bool has_unmovable = false; pte_t *pte, *mapped_pte; pte_t ptent; spinlock_t *ptl; @@ -538,11 +536,12 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, if (!queue_folio_required(folio, qp)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - /* MPOL_MF_STRICT must be specified if we get here */ - if (!vma_migratable(vma)) { - has_unmovable = true; - break; - } + /* + * MPOL_MF_STRICT must be specified if we get here. + * Continue walking vmas due to MPOL_MF_MOVE* flags. + */ + if (!vma_migratable(vma)) + qp->has_unmovable = true; /* * Do not abort immediately since there may be @@ -550,16 +549,13 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, * need migrate other LRU pages. */ if (migrate_folio_add(folio, qp->pagelist, flags)) - has_unmovable = true; + qp->has_unmovable = true; } else break; } pte_unmap_unlock(mapped_pte, ptl); cond_resched(); - if (has_unmovable) - return 1; - return addr != end ? -EIO : 0; } @@ -599,7 +595,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Detecting misplaced folio but allow migrating folios which * have been queued. */ - ret = 1; + qp->has_unmovable = true; goto unlock; } @@ -620,7 +616,7 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, * Failed to isolate folio but allow migrating pages * which have been queued. */ - ret = 1; + qp->has_unmovable = true; } unlock: spin_unlock(ptl); @@ -756,12 +752,15 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, .start = start, .end = end, .first = NULL, + .has_unmovable = false, }; const struct mm_walk_ops *ops = lock_vma ? &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops; err = walk_page_range(mm, start, end, ops, &qp); + if (qp.has_unmovable) + err = 1; if (!qp.first) /* whole range in hole */ err = -EFAULT; @@ -1358,7 +1357,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_movable_pages(&pagelist); } - if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT))) + if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT)) err = -EIO; } else { up_out:

2 years

1
0
0 0

FAILED: patch "[PATCH] proc: nommu: /proc/<pid>/maps: release mmap read lock" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y git checkout FETCH_HEAD git cherry-pick -x 578d7699e5c2add8c2e9549d9d75dfb56c460cb3 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100409-shorten-demanding-15d2@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 578d7699e5c2add8c2e9549d9d75dfb56c460cb3 Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer <Ben.Wolsieffer(a)hefring.com> Date: Thu, 14 Sep 2023 12:30:20 -0400 Subject: [PATCH] proc: nommu: /proc/<pid>/maps: release mmap read lock The no-MMU implementation of /proc/<pid>/map doesn't normally release the mmap read lock, because it uses !IS_ERR_OR_NULL(_vml) to determine whether to release the lock. Since _vml is NULL when the end of the mappings is reached, the lock is not released. Reading /proc/1/maps twice doesn't cause a hang because it only takes the read lock, which can be taken multiple times and therefore doesn't show any problem if the lock isn't released. Instead, you need to perform some operation that attempts to take the write lock after reading /proc/<pid>/maps. To actually reproduce the bug, compile the following code as 'proc_maps_bug': #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main(int argc, char *argv[]) { void *buf; sleep(1); buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); puts("mmap returned"); return 0; } Then, run: ./proc_maps_bug &; cat /proc/$!/maps; fg Without this patch, mmap() will hang and the command will never complete. This code was incorrectly adapted from the MMU implementation, which at the time released the lock in m_next() before returning the last entry. The MMU implementation has diverged further from the no-MMU version since then, so this patch brings their locking and error handling into sync, fixing the bug and hopefully avoiding similar issues in the future. Link: https://lkml.kernel.org/r/20230914163019.4050530-2-ben.wolsieffer@hefring.c… Fixes: 47fecca15c09 ("fs/proc/task_nommu.c: don't use priv->task->mm") Signed-off-by: Ben Wolsieffer <ben.wolsieffer(a)hefring.com> Acked-by: Oleg Nesterov <oleg(a)redhat.com> Cc: Giulio Benetti <giulio.benetti(a)benettiengineering.com> Cc: Greg Ungerer <gerg(a)uclinux.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index a8ac0dd8041e..bc2e843f4810 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -192,11 +192,16 @@ static void *m_start(struct seq_file *m, loff_t *pos) return ERR_PTR(-ESRCH); mm = priv->mm; - if (!mm || !mmget_not_zero(mm)) + if (!mm || !mmget_not_zero(mm)) { + put_task_struct(priv->task); + priv->task = NULL; return NULL; + } if (mmap_read_lock_killable(mm)) { mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; return ERR_PTR(-EINTR); } @@ -205,23 +210,21 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (vma) return vma; - mmap_read_unlock(mm); - mmput(mm); return NULL; } -static void m_stop(struct seq_file *m, void *_vml) +static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; + struct mm_struct *mm = priv->mm; - if (!IS_ERR_OR_NULL(_vml)) { - mmap_read_unlock(priv->mm); - mmput(priv->mm); - } - if (priv->task) { - put_task_struct(priv->task); - priv->task = NULL; - } + if (!priv->task) + return; + + mmap_read_unlock(mm); + mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; } static void *m_next(struct seq_file *m, void *_p, loff_t *pos)

2 years

1
0
0 0

FAILED: patch "[PATCH] proc: nommu: /proc/<pid>/maps: release mmap read lock" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 578d7699e5c2add8c2e9549d9d75dfb56c460cb3 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100408-swab-depict-d818@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 578d7699e5c2add8c2e9549d9d75dfb56c460cb3 Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer <Ben.Wolsieffer(a)hefring.com> Date: Thu, 14 Sep 2023 12:30:20 -0400 Subject: [PATCH] proc: nommu: /proc/<pid>/maps: release mmap read lock The no-MMU implementation of /proc/<pid>/map doesn't normally release the mmap read lock, because it uses !IS_ERR_OR_NULL(_vml) to determine whether to release the lock. Since _vml is NULL when the end of the mappings is reached, the lock is not released. Reading /proc/1/maps twice doesn't cause a hang because it only takes the read lock, which can be taken multiple times and therefore doesn't show any problem if the lock isn't released. Instead, you need to perform some operation that attempts to take the write lock after reading /proc/<pid>/maps. To actually reproduce the bug, compile the following code as 'proc_maps_bug': #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main(int argc, char *argv[]) { void *buf; sleep(1); buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); puts("mmap returned"); return 0; } Then, run: ./proc_maps_bug &; cat /proc/$!/maps; fg Without this patch, mmap() will hang and the command will never complete. This code was incorrectly adapted from the MMU implementation, which at the time released the lock in m_next() before returning the last entry. The MMU implementation has diverged further from the no-MMU version since then, so this patch brings their locking and error handling into sync, fixing the bug and hopefully avoiding similar issues in the future. Link: https://lkml.kernel.org/r/20230914163019.4050530-2-ben.wolsieffer@hefring.c… Fixes: 47fecca15c09 ("fs/proc/task_nommu.c: don't use priv->task->mm") Signed-off-by: Ben Wolsieffer <ben.wolsieffer(a)hefring.com> Acked-by: Oleg Nesterov <oleg(a)redhat.com> Cc: Giulio Benetti <giulio.benetti(a)benettiengineering.com> Cc: Greg Ungerer <gerg(a)uclinux.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index a8ac0dd8041e..bc2e843f4810 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -192,11 +192,16 @@ static void *m_start(struct seq_file *m, loff_t *pos) return ERR_PTR(-ESRCH); mm = priv->mm; - if (!mm || !mmget_not_zero(mm)) + if (!mm || !mmget_not_zero(mm)) { + put_task_struct(priv->task); + priv->task = NULL; return NULL; + } if (mmap_read_lock_killable(mm)) { mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; return ERR_PTR(-EINTR); } @@ -205,23 +210,21 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (vma) return vma; - mmap_read_unlock(mm); - mmput(mm); return NULL; } -static void m_stop(struct seq_file *m, void *_vml) +static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; + struct mm_struct *mm = priv->mm; - if (!IS_ERR_OR_NULL(_vml)) { - mmap_read_unlock(priv->mm); - mmput(priv->mm); - } - if (priv->task) { - put_task_struct(priv->task); - priv->task = NULL; - } + if (!priv->task) + return; + + mmap_read_unlock(mm); + mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; } static void *m_next(struct seq_file *m, void *_p, loff_t *pos)

2 years

1
0
0 0

FAILED: patch "[PATCH] proc: nommu: /proc/<pid>/maps: release mmap read lock" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 578d7699e5c2add8c2e9549d9d75dfb56c460cb3 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100406-component-isolating-b700@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 578d7699e5c2add8c2e9549d9d75dfb56c460cb3 Mon Sep 17 00:00:00 2001 From: Ben Wolsieffer <Ben.Wolsieffer(a)hefring.com> Date: Thu, 14 Sep 2023 12:30:20 -0400 Subject: [PATCH] proc: nommu: /proc/<pid>/maps: release mmap read lock The no-MMU implementation of /proc/<pid>/map doesn't normally release the mmap read lock, because it uses !IS_ERR_OR_NULL(_vml) to determine whether to release the lock. Since _vml is NULL when the end of the mappings is reached, the lock is not released. Reading /proc/1/maps twice doesn't cause a hang because it only takes the read lock, which can be taken multiple times and therefore doesn't show any problem if the lock isn't released. Instead, you need to perform some operation that attempts to take the write lock after reading /proc/<pid>/maps. To actually reproduce the bug, compile the following code as 'proc_maps_bug': #include <stdio.h> #include <unistd.h> #include <sys/mman.h> int main(int argc, char *argv[]) { void *buf; sleep(1); buf = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); puts("mmap returned"); return 0; } Then, run: ./proc_maps_bug &; cat /proc/$!/maps; fg Without this patch, mmap() will hang and the command will never complete. This code was incorrectly adapted from the MMU implementation, which at the time released the lock in m_next() before returning the last entry. The MMU implementation has diverged further from the no-MMU version since then, so this patch brings their locking and error handling into sync, fixing the bug and hopefully avoiding similar issues in the future. Link: https://lkml.kernel.org/r/20230914163019.4050530-2-ben.wolsieffer@hefring.c… Fixes: 47fecca15c09 ("fs/proc/task_nommu.c: don't use priv->task->mm") Signed-off-by: Ben Wolsieffer <ben.wolsieffer(a)hefring.com> Acked-by: Oleg Nesterov <oleg(a)redhat.com> Cc: Giulio Benetti <giulio.benetti(a)benettiengineering.com> Cc: Greg Ungerer <gerg(a)uclinux.org> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index a8ac0dd8041e..bc2e843f4810 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -192,11 +192,16 @@ static void *m_start(struct seq_file *m, loff_t *pos) return ERR_PTR(-ESRCH); mm = priv->mm; - if (!mm || !mmget_not_zero(mm)) + if (!mm || !mmget_not_zero(mm)) { + put_task_struct(priv->task); + priv->task = NULL; return NULL; + } if (mmap_read_lock_killable(mm)) { mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; return ERR_PTR(-EINTR); } @@ -205,23 +210,21 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (vma) return vma; - mmap_read_unlock(mm); - mmput(mm); return NULL; } -static void m_stop(struct seq_file *m, void *_vml) +static void m_stop(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; + struct mm_struct *mm = priv->mm; - if (!IS_ERR_OR_NULL(_vml)) { - mmap_read_unlock(priv->mm); - mmput(priv->mm); - } - if (priv->task) { - put_task_struct(priv->task); - priv->task = NULL; - } + if (!priv->task) + return; + + mmap_read_unlock(mm); + mmput(mm); + put_task_struct(priv->task); + priv->task = NULL; } static void *m_next(struct seq_file *m, void *_p, loff_t *pos)

2 years

1
0
0 0

FAILED: patch "[PATCH] NFSv4: Fix a state manager thread deadlock regression" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 956fd46f97d238032cb5fa4771cdaccc6e760f9a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100429-stonework-conceal-099d@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 956fd46f97d238032cb5fa4771cdaccc6e760f9a Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust(a)hammerspace.com> Date: Sun, 24 Sep 2023 13:14:15 -0400 Subject: [PATCH] NFSv4: Fix a state manager thread deadlock regression Commit 4dc73c679114 reintroduces the deadlock that was fixed by commit aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because it prevents the setup of new threads to handle reboot recovery, while the older recovery thread is stuck returning delegations. Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled") Cc: stable(a)vger.kernel.org Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com> Signed-off-by: Anna Schumaker <Anna.Schumaker(a)Netapp.com> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3508d8238826..7016eaadf555 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10622,7 +10622,9 @@ static void nfs4_disable_swap(struct inode *inode) */ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - nfs4_schedule_state_manager(clp); + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + wake_up_var(&clp->cl_state); } static const struct inode_operations nfs4_dir_inode_operations = { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0bc160fbabec..9a5d911a7edc 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1209,16 +1209,26 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; + struct rpc_clnt *clnt = clp->cl_rpcclient; + bool swapon = false; - if (clp->cl_rpcclient->cl_shutdown) + if (clnt->cl_shutdown) return; set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); - if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { - wake_up_var(&clp->cl_state); - return; + + if (atomic_read(&clnt->cl_swapper)) { + swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, + &clp->cl_state); + if (!swapon) { + wake_up_var(&clp->cl_state); + return; + } } - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); + + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + return; + __module_get(THIS_MODULE); refcount_inc(&clp->cl_count); @@ -1235,8 +1245,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) __func__, PTR_ERR(task)); if (!nfs_client_init_is_complete(clp)) nfs_mark_client_ready(clp, PTR_ERR(task)); + if (swapon) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs4_clear_state_manager_bit(clp); - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs_put_client(clp); module_put(THIS_MODULE); } @@ -2748,22 +2759,25 @@ static int nfs4_run_state_manager(void *ptr) allow_signal(SIGKILL); again: - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); nfs4_state_manager(clp); - if (atomic_read(&cl->cl_swapper)) { + + if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) && + !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) { wait_var_event_interruptible(&clp->cl_state, test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)); - if (atomic_read(&cl->cl_swapper) && - test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) + if (!atomic_read(&cl->cl_swapper)) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + if (refcount_read(&clp->cl_count) > 1 && !signalled() && + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; /* Either no longer a swapper, or were signalled */ + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); } - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); if (refcount_read(&clp->cl_count) > 1 && !signalled() && test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && - !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state)) + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; nfs_put_client(clp);

2 years

1
0
0 0

FAILED: patch "[PATCH] NFSv4: Fix a state manager thread deadlock regression" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 956fd46f97d238032cb5fa4771cdaccc6e760f9a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100428-runaround-caliber-aaa3@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 956fd46f97d238032cb5fa4771cdaccc6e760f9a Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust(a)hammerspace.com> Date: Sun, 24 Sep 2023 13:14:15 -0400 Subject: [PATCH] NFSv4: Fix a state manager thread deadlock regression Commit 4dc73c679114 reintroduces the deadlock that was fixed by commit aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because it prevents the setup of new threads to handle reboot recovery, while the older recovery thread is stuck returning delegations. Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled") Cc: stable(a)vger.kernel.org Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com> Signed-off-by: Anna Schumaker <Anna.Schumaker(a)Netapp.com> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3508d8238826..7016eaadf555 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10622,7 +10622,9 @@ static void nfs4_disable_swap(struct inode *inode) */ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - nfs4_schedule_state_manager(clp); + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + wake_up_var(&clp->cl_state); } static const struct inode_operations nfs4_dir_inode_operations = { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0bc160fbabec..9a5d911a7edc 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1209,16 +1209,26 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; + struct rpc_clnt *clnt = clp->cl_rpcclient; + bool swapon = false; - if (clp->cl_rpcclient->cl_shutdown) + if (clnt->cl_shutdown) return; set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); - if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { - wake_up_var(&clp->cl_state); - return; + + if (atomic_read(&clnt->cl_swapper)) { + swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, + &clp->cl_state); + if (!swapon) { + wake_up_var(&clp->cl_state); + return; + } } - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); + + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + return; + __module_get(THIS_MODULE); refcount_inc(&clp->cl_count); @@ -1235,8 +1245,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) __func__, PTR_ERR(task)); if (!nfs_client_init_is_complete(clp)) nfs_mark_client_ready(clp, PTR_ERR(task)); + if (swapon) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs4_clear_state_manager_bit(clp); - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs_put_client(clp); module_put(THIS_MODULE); } @@ -2748,22 +2759,25 @@ static int nfs4_run_state_manager(void *ptr) allow_signal(SIGKILL); again: - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); nfs4_state_manager(clp); - if (atomic_read(&cl->cl_swapper)) { + + if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) && + !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) { wait_var_event_interruptible(&clp->cl_state, test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)); - if (atomic_read(&cl->cl_swapper) && - test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) + if (!atomic_read(&cl->cl_swapper)) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + if (refcount_read(&clp->cl_count) > 1 && !signalled() && + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; /* Either no longer a swapper, or were signalled */ + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); } - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); if (refcount_read(&clp->cl_count) > 1 && !signalled() && test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && - !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state)) + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; nfs_put_client(clp);

2 years

1
0
0 0

FAILED: patch "[PATCH] NFSv4: Fix a state manager thread deadlock regression" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 956fd46f97d238032cb5fa4771cdaccc6e760f9a # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100427-labored-staff-0ab4@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 956fd46f97d238032cb5fa4771cdaccc6e760f9a Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust(a)hammerspace.com> Date: Sun, 24 Sep 2023 13:14:15 -0400 Subject: [PATCH] NFSv4: Fix a state manager thread deadlock regression Commit 4dc73c679114 reintroduces the deadlock that was fixed by commit aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because it prevents the setup of new threads to handle reboot recovery, while the older recovery thread is stuck returning delegations. Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled") Cc: stable(a)vger.kernel.org Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com> Signed-off-by: Anna Schumaker <Anna.Schumaker(a)Netapp.com> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3508d8238826..7016eaadf555 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -10622,7 +10622,9 @@ static void nfs4_disable_swap(struct inode *inode) */ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; - nfs4_schedule_state_manager(clp); + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + wake_up_var(&clp->cl_state); } static const struct inode_operations nfs4_dir_inode_operations = { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0bc160fbabec..9a5d911a7edc 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1209,16 +1209,26 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; + struct rpc_clnt *clnt = clp->cl_rpcclient; + bool swapon = false; - if (clp->cl_rpcclient->cl_shutdown) + if (clnt->cl_shutdown) return; set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); - if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { - wake_up_var(&clp->cl_state); - return; + + if (atomic_read(&clnt->cl_swapper)) { + swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, + &clp->cl_state); + if (!swapon) { + wake_up_var(&clp->cl_state); + return; + } } - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); + + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) + return; + __module_get(THIS_MODULE); refcount_inc(&clp->cl_count); @@ -1235,8 +1245,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) __func__, PTR_ERR(task)); if (!nfs_client_init_is_complete(clp)) nfs_mark_client_ready(clp, PTR_ERR(task)); + if (swapon) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs4_clear_state_manager_bit(clp); - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); nfs_put_client(clp); module_put(THIS_MODULE); } @@ -2748,22 +2759,25 @@ static int nfs4_run_state_manager(void *ptr) allow_signal(SIGKILL); again: - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); nfs4_state_manager(clp); - if (atomic_read(&cl->cl_swapper)) { + + if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) && + !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) { wait_var_event_interruptible(&clp->cl_state, test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)); - if (atomic_read(&cl->cl_swapper) && - test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) + if (!atomic_read(&cl->cl_swapper)) + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + if (refcount_read(&clp->cl_count) > 1 && !signalled() && + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; /* Either no longer a swapper, or were signalled */ + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); } - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); if (refcount_read(&clp->cl_count) > 1 && !signalled() && test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && - !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state)) + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) goto again; nfs_put_client(clp);

2 years

1
0
0 0

FAILED: patch "[PATCH] ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates" failed to apply to 6.5-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.5-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y git checkout FETCH_HEAD git cherry-pick -x e765886249c533e1bb5cbc3cd741bad677417312 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100450-shone-catchy-89d1@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From e765886249c533e1bb5cbc3cd741bad677417312 Mon Sep 17 00:00:00 2001 From: Sameer Pujar <spujar(a)nvidia.com> Date: Thu, 7 Sep 2023 20:32:25 +0530 Subject: [PATCH] ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates Tegra audio graph card has many DAI links which connects internal AHUB modules and external audio codecs. Since these are DPCM links, hw_params() call in the machine driver happens for each connected BE link and PLLA is updated every time. This is not really needed for all links as only I/O link DAIs derive respective clocks from PLLA_OUT0 and thus from PLLA. Hence add checks to limit the clock updates to DAIs over I/O links. This found to be fixing a DMIC clock discrepancy which is suspected to happen because of back to back quick PLLA and PLLA_OUT0 rate updates. This was observed on Jetson TX2 platform where DMIC clock ended up with unexpected value. Fixes: 202e2f774543 ("ASoC: tegra: Add audio graph based card driver") Cc: stable(a)vger.kernel.org Signed-off-by: Sameer Pujar <spujar(a)nvidia.com> Link: https://lore.kernel.org/r/1694098945-32760-3-git-send-email-spujar@nvidia.c… Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c index 1f2c5018bf5a..4737e776d383 100644 --- a/sound/soc/tegra/tegra_audio_graph_card.c +++ b/sound/soc/tegra/tegra_audio_graph_card.c @@ -10,6 +10,7 @@ #include <linux/platform_device.h> #include <sound/graph_card.h> #include <sound/pcm_params.h> +#include <sound/soc-dai.h> #define MAX_PLLA_OUT0_DIV 128 @@ -44,6 +45,21 @@ struct tegra_audio_cdata { unsigned int plla_out0_rates[NUM_RATE_TYPE]; }; +static bool need_clk_update(struct snd_soc_dai *dai) +{ + if (snd_soc_dai_is_dummy(dai) || + !dai->driver->ops || + !dai->driver->name) + return false; + + if (strstr(dai->driver->name, "I2S") || + strstr(dai->driver->name, "DMIC") || + strstr(dai->driver->name, "DSPK")) + return true; + + return false; +} + /* Setup PLL clock as per the given sample rate */ static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); int err; - /* - * This gets called for each DAI link (FE or BE) when DPCM is used. - * We may not want to update PLLA rate for each call. So PLLA update - * must be restricted to external I/O links (I2S, DMIC or DSPK) since - * they actually depend on it. I/O modules update their clocks in - * hw_param() of their respective component driver and PLLA rate - * update here helps them to derive appropriate rates. - * - * TODO: When more HW accelerators get added (like sample rate - * converter, volume gain controller etc., which don't really - * depend on PLLA) we need a better way to filter here. - */ - if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) { + if (need_clk_update(cpu_dai)) { err = tegra_audio_graph_update_pll(substream, params); if (err) return err;

2 years

1
0
0 0

RE: Money20/20 USA in Las Vegas Attendees Data-List 2023

by Mary Liyana

Hi, Would you be interested in acquiring Money20/20 USA Email List? Number of Contacts: 11,542 Cost: $ 1,687 Interested? Email me back; I would love to provide more information on the list. Kind Regards, Mary Liyana Marketing Coordinator

2 years

1
0
0 0

FAILED: patch "[PATCH] io_uring/fs: remove sqe->rw_flags checking from LINKAT" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x a52d4f657568d6458e873f74a9602e022afe666f # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100446-broiler-liquid-20a4@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From a52d4f657568d6458e873f74a9602e022afe666f Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe(a)kernel.dk> Date: Thu, 28 Sep 2023 09:23:27 -0600 Subject: [PATCH] io_uring/fs: remove sqe->rw_flags checking from LINKAT This is unionized with the actual link flags, so they can of course be set and they will be evaluated further down. If not we fail any LINKAT that has to set option flags. Fixes: cf30da90bc3a ("io_uring: add support for IORING_OP_LINKAT") Cc: stable(a)vger.kernel.org Reported-by: Thomas Leonard <talex5(a)gmail.com> Link: https://github.com/axboe/liburing/issues/955 Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/io_uring/fs.c b/io_uring/fs.c index f6a69a549fd4..08e3b175469c 100644 --- a/io_uring/fs.c +++ b/io_uring/fs.c @@ -243,7 +243,7 @@ int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link); const char __user *oldf, *newf; - if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) + if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF;

2 years

3
2
0 0

FAILED: patch "[PATCH] ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x e765886249c533e1bb5cbc3cd741bad677417312 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100451-matchless-aching-f16c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From e765886249c533e1bb5cbc3cd741bad677417312 Mon Sep 17 00:00:00 2001 From: Sameer Pujar <spujar(a)nvidia.com> Date: Thu, 7 Sep 2023 20:32:25 +0530 Subject: [PATCH] ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates Tegra audio graph card has many DAI links which connects internal AHUB modules and external audio codecs. Since these are DPCM links, hw_params() call in the machine driver happens for each connected BE link and PLLA is updated every time. This is not really needed for all links as only I/O link DAIs derive respective clocks from PLLA_OUT0 and thus from PLLA. Hence add checks to limit the clock updates to DAIs over I/O links. This found to be fixing a DMIC clock discrepancy which is suspected to happen because of back to back quick PLLA and PLLA_OUT0 rate updates. This was observed on Jetson TX2 platform where DMIC clock ended up with unexpected value. Fixes: 202e2f774543 ("ASoC: tegra: Add audio graph based card driver") Cc: stable(a)vger.kernel.org Signed-off-by: Sameer Pujar <spujar(a)nvidia.com> Link: https://lore.kernel.org/r/1694098945-32760-3-git-send-email-spujar@nvidia.c… Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c index 1f2c5018bf5a..4737e776d383 100644 --- a/sound/soc/tegra/tegra_audio_graph_card.c +++ b/sound/soc/tegra/tegra_audio_graph_card.c @@ -10,6 +10,7 @@ #include <linux/platform_device.h> #include <sound/graph_card.h> #include <sound/pcm_params.h> +#include <sound/soc-dai.h> #define MAX_PLLA_OUT0_DIV 128 @@ -44,6 +45,21 @@ struct tegra_audio_cdata { unsigned int plla_out0_rates[NUM_RATE_TYPE]; }; +static bool need_clk_update(struct snd_soc_dai *dai) +{ + if (snd_soc_dai_is_dummy(dai) || + !dai->driver->ops || + !dai->driver->name) + return false; + + if (strstr(dai->driver->name, "I2S") || + strstr(dai->driver->name, "DMIC") || + strstr(dai->driver->name, "DSPK")) + return true; + + return false; +} + /* Setup PLL clock as per the given sample rate */ static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); int err; - /* - * This gets called for each DAI link (FE or BE) when DPCM is used. - * We may not want to update PLLA rate for each call. So PLLA update - * must be restricted to external I/O links (I2S, DMIC or DSPK) since - * they actually depend on it. I/O modules update their clocks in - * hw_param() of their respective component driver and PLLA rate - * update here helps them to derive appropriate rates. - * - * TODO: When more HW accelerators get added (like sample rate - * converter, volume gain controller etc., which don't really - * depend on PLLA) we need a better way to filter here. - */ - if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) { + if (need_clk_update(cpu_dai)) { err = tegra_audio_graph_update_pll(substream, params); if (err) return err;

2 years

1
0
0 0

FAILED: patch "[PATCH] ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x e765886249c533e1bb5cbc3cd741bad677417312 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100420-corned-elm-de4b@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From e765886249c533e1bb5cbc3cd741bad677417312 Mon Sep 17 00:00:00 2001 From: Sameer Pujar <spujar(a)nvidia.com> Date: Thu, 7 Sep 2023 20:32:25 +0530 Subject: [PATCH] ASoC: tegra: Fix redundant PLLA and PLLA_OUT0 updates Tegra audio graph card has many DAI links which connects internal AHUB modules and external audio codecs. Since these are DPCM links, hw_params() call in the machine driver happens for each connected BE link and PLLA is updated every time. This is not really needed for all links as only I/O link DAIs derive respective clocks from PLLA_OUT0 and thus from PLLA. Hence add checks to limit the clock updates to DAIs over I/O links. This found to be fixing a DMIC clock discrepancy which is suspected to happen because of back to back quick PLLA and PLLA_OUT0 rate updates. This was observed on Jetson TX2 platform where DMIC clock ended up with unexpected value. Fixes: 202e2f774543 ("ASoC: tegra: Add audio graph based card driver") Cc: stable(a)vger.kernel.org Signed-off-by: Sameer Pujar <spujar(a)nvidia.com> Link: https://lore.kernel.org/r/1694098945-32760-3-git-send-email-spujar@nvidia.c… Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c index 1f2c5018bf5a..4737e776d383 100644 --- a/sound/soc/tegra/tegra_audio_graph_card.c +++ b/sound/soc/tegra/tegra_audio_graph_card.c @@ -10,6 +10,7 @@ #include <linux/platform_device.h> #include <sound/graph_card.h> #include <sound/pcm_params.h> +#include <sound/soc-dai.h> #define MAX_PLLA_OUT0_DIV 128 @@ -44,6 +45,21 @@ struct tegra_audio_cdata { unsigned int plla_out0_rates[NUM_RATE_TYPE]; }; +static bool need_clk_update(struct snd_soc_dai *dai) +{ + if (snd_soc_dai_is_dummy(dai) || + !dai->driver->ops || + !dai->driver->name) + return false; + + if (strstr(dai->driver->name, "I2S") || + strstr(dai->driver->name, "DMIC") || + strstr(dai->driver->name, "DSPK")) + return true; + + return false; +} + /* Setup PLL clock as per the given sample rate */ static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) @@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); int err; - /* - * This gets called for each DAI link (FE or BE) when DPCM is used. - * We may not want to update PLLA rate for each call. So PLLA update - * must be restricted to external I/O links (I2S, DMIC or DSPK) since - * they actually depend on it. I/O modules update their clocks in - * hw_param() of their respective component driver and PLLA rate - * update here helps them to derive appropriate rates. - * - * TODO: When more HW accelerators get added (like sample rate - * converter, volume gain controller etc., which don't really - * depend on PLLA) we need a better way to filter here. - */ - if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) { + if (need_clk_update(cpu_dai)) { err = tegra_audio_graph_update_pll(substream, params); if (err) return err;

2 years

1
0
0 0

FAILED: patch "[PATCH] ALSA: hda/realtek - ALC287 Realtek I2S speaker platform" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 41b07476da38ac2878a14e5b8fe0312c41ea36e3 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100456-acclaim-object-ca91@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 41b07476da38ac2878a14e5b8fe0312c41ea36e3 Mon Sep 17 00:00:00 2001 From: Kailang Yang <kailang(a)realtek.com> Date: Tue, 19 Sep 2023 16:27:16 +0800 Subject: [PATCH] ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support New platform SSID:0x231f. 0x17 was only speaker pin, DAC assigned will be 0x03. Headphone assigned to 0x02. Playback via headphone will get EQ filter processing. So, it needs to swap DAC. Signed-off-by: Kailang Yang <kailang(a)realtek.com> Cc: <stable(a)vger.kernel.org> Link: https://lore.kernel.org/r/8d63c6e360124e3ea2523753050e6f05@realtek.com Signed-off-by: Takashi Iwai <tiwai(a)suse.de> diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 883a7e865bc5..751783f3a15c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10577,6 +10577,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x17, 0x90170110}, {0x19, 0x03a11030}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK, + {0x17, 0x90170110}, /* 0x231f with RTK I2S AMP */ + {0x19, 0x04a11040}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE, {0x12, 0x90a60130}, {0x17, 0x90170110},

2 years

1
0
0 0

FAILED: patch "[PATCH] ALSA: hda/realtek - ALC287 Realtek I2S speaker platform" failed to apply to 6.5-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.5-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y git checkout FETCH_HEAD git cherry-pick -x 41b07476da38ac2878a14e5b8fe0312c41ea36e3 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100451-alarm-grasp-7872@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 41b07476da38ac2878a14e5b8fe0312c41ea36e3 Mon Sep 17 00:00:00 2001 From: Kailang Yang <kailang(a)realtek.com> Date: Tue, 19 Sep 2023 16:27:16 +0800 Subject: [PATCH] ALSA: hda/realtek - ALC287 Realtek I2S speaker platform support New platform SSID:0x231f. 0x17 was only speaker pin, DAC assigned will be 0x03. Headphone assigned to 0x02. Playback via headphone will get EQ filter processing. So, it needs to swap DAC. Signed-off-by: Kailang Yang <kailang(a)realtek.com> Cc: <stable(a)vger.kernel.org> Link: https://lore.kernel.org/r/8d63c6e360124e3ea2523753050e6f05@realtek.com Signed-off-by: Takashi Iwai <tiwai(a)suse.de> diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 883a7e865bc5..751783f3a15c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10577,6 +10577,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x17, 0x90170110}, {0x19, 0x03a11030}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK, + {0x17, 0x90170110}, /* 0x231f with RTK I2S AMP */ + {0x19, 0x04a11040}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE, {0x12, 0x90a60130}, {0x17, 0x90170110},

2 years

1
0
0 0

FAILED: patch "[PATCH] kernel/sched: Modify initial boot task idle setup" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y git checkout FETCH_HEAD git cherry-pick -x cff9b2332ab762b7e0586c793c431a8f2ea4db04 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100455-astrology-overcrowd-10e9@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From cff9b2332ab762b7e0586c793c431a8f2ea4db04 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com> Date: Fri, 15 Sep 2023 13:44:44 -0400 Subject: [PATCH] kernel/sched: Modify initial boot task idle setup Initial booting is setting the task flag to idle (PF_IDLE) by the call path sched_init() -> init_idle(). Having the task idle and calling call_rcu() in kernel/rcu/tiny.c means that TIF_NEED_RESCHED will be set. Subsequent calls to any cond_resched() will enable IRQs, potentially earlier than the IRQ setup has completed. Recent changes have caused just this scenario and IRQs have been enabled early. This causes a warning later in start_kernel() as interrupts are enabled before they are fully set up. Fix this issue by setting the PF_IDLE flag later in the boot sequence. Although the boot task was marked as idle since (at least) d80e4fda576d, I am not sure that it is wrong to do so. The forced context-switch on idle task was introduced in the tiny_rcu update, so I'm going to claim this fixes 5f6130fa52ee. Fixes: 5f6130fa52ee ("tiny_rcu: Directly force QS when call_rcu_[bh|sched]() on idle_task") Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com> Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/linux-mm/CAMuHMdWpvpWoDa=Ox-do92czYRvkok6_x6pYUH+Zo… diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2299a5cfbfb9..802551e0009b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9269,7 +9269,7 @@ void __init init_idle(struct task_struct *idle, int cpu) * PF_KTHREAD should already be set at this point; regardless, make it * look like a proper per-CPU kthread. */ - idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; + idle->flags |= PF_KTHREAD | PF_NO_SETAFFINITY; kthread_set_per_cpu(idle, cpu); #ifdef CONFIG_SMP diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 342f58a329f5..5007b25c5bc6 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -373,6 +373,7 @@ EXPORT_SYMBOL_GPL(play_idle_precise); void cpu_startup_entry(enum cpuhp_state state) { + current->flags |= PF_IDLE; arch_cpu_idle_prepare(); cpuhp_online_idle(state); while (1)

2 years

1
0
0 0

FAILED: patch "[PATCH] kernel/sched: Modify initial boot task idle setup" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x cff9b2332ab762b7e0586c793c431a8f2ea4db04 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100454-crawlers-crinkle-fea4@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From cff9b2332ab762b7e0586c793c431a8f2ea4db04 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com> Date: Fri, 15 Sep 2023 13:44:44 -0400 Subject: [PATCH] kernel/sched: Modify initial boot task idle setup Initial booting is setting the task flag to idle (PF_IDLE) by the call path sched_init() -> init_idle(). Having the task idle and calling call_rcu() in kernel/rcu/tiny.c means that TIF_NEED_RESCHED will be set. Subsequent calls to any cond_resched() will enable IRQs, potentially earlier than the IRQ setup has completed. Recent changes have caused just this scenario and IRQs have been enabled early. This causes a warning later in start_kernel() as interrupts are enabled before they are fully set up. Fix this issue by setting the PF_IDLE flag later in the boot sequence. Although the boot task was marked as idle since (at least) d80e4fda576d, I am not sure that it is wrong to do so. The forced context-switch on idle task was introduced in the tiny_rcu update, so I'm going to claim this fixes 5f6130fa52ee. Fixes: 5f6130fa52ee ("tiny_rcu: Directly force QS when call_rcu_[bh|sched]() on idle_task") Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com> Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/linux-mm/CAMuHMdWpvpWoDa=Ox-do92czYRvkok6_x6pYUH+Zo… diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2299a5cfbfb9..802551e0009b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9269,7 +9269,7 @@ void __init init_idle(struct task_struct *idle, int cpu) * PF_KTHREAD should already be set at this point; regardless, make it * look like a proper per-CPU kthread. */ - idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; + idle->flags |= PF_KTHREAD | PF_NO_SETAFFINITY; kthread_set_per_cpu(idle, cpu); #ifdef CONFIG_SMP diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 342f58a329f5..5007b25c5bc6 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -373,6 +373,7 @@ EXPORT_SYMBOL_GPL(play_idle_precise); void cpu_startup_entry(enum cpuhp_state state) { + current->flags |= PF_IDLE; arch_cpu_idle_prepare(); cpuhp_online_idle(state); while (1)

2 years

1
0
0 0

FAILED: patch "[PATCH] kernel/sched: Modify initial boot task idle setup" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x cff9b2332ab762b7e0586c793c431a8f2ea4db04 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100453-company-budget-dc97@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From cff9b2332ab762b7e0586c793c431a8f2ea4db04 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com> Date: Fri, 15 Sep 2023 13:44:44 -0400 Subject: [PATCH] kernel/sched: Modify initial boot task idle setup Initial booting is setting the task flag to idle (PF_IDLE) by the call path sched_init() -> init_idle(). Having the task idle and calling call_rcu() in kernel/rcu/tiny.c means that TIF_NEED_RESCHED will be set. Subsequent calls to any cond_resched() will enable IRQs, potentially earlier than the IRQ setup has completed. Recent changes have caused just this scenario and IRQs have been enabled early. This causes a warning later in start_kernel() as interrupts are enabled before they are fully set up. Fix this issue by setting the PF_IDLE flag later in the boot sequence. Although the boot task was marked as idle since (at least) d80e4fda576d, I am not sure that it is wrong to do so. The forced context-switch on idle task was introduced in the tiny_rcu update, so I'm going to claim this fixes 5f6130fa52ee. Fixes: 5f6130fa52ee ("tiny_rcu: Directly force QS when call_rcu_[bh|sched]() on idle_task") Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com> Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/linux-mm/CAMuHMdWpvpWoDa=Ox-do92czYRvkok6_x6pYUH+Zo… diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2299a5cfbfb9..802551e0009b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9269,7 +9269,7 @@ void __init init_idle(struct task_struct *idle, int cpu) * PF_KTHREAD should already be set at this point; regardless, make it * look like a proper per-CPU kthread. */ - idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; + idle->flags |= PF_KTHREAD | PF_NO_SETAFFINITY; kthread_set_per_cpu(idle, cpu); #ifdef CONFIG_SMP diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 342f58a329f5..5007b25c5bc6 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -373,6 +373,7 @@ EXPORT_SYMBOL_GPL(play_idle_precise); void cpu_startup_entry(enum cpuhp_state state) { + current->flags |= PF_IDLE; arch_cpu_idle_prepare(); cpuhp_online_idle(state); while (1)

2 years

1
0
0 0

FAILED: patch "[PATCH] kernel/sched: Modify initial boot task idle setup" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x cff9b2332ab762b7e0586c793c431a8f2ea4db04 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100451-cough-encrypt-1d2e@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From cff9b2332ab762b7e0586c793c431a8f2ea4db04 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com> Date: Fri, 15 Sep 2023 13:44:44 -0400 Subject: [PATCH] kernel/sched: Modify initial boot task idle setup Initial booting is setting the task flag to idle (PF_IDLE) by the call path sched_init() -> init_idle(). Having the task idle and calling call_rcu() in kernel/rcu/tiny.c means that TIF_NEED_RESCHED will be set. Subsequent calls to any cond_resched() will enable IRQs, potentially earlier than the IRQ setup has completed. Recent changes have caused just this scenario and IRQs have been enabled early. This causes a warning later in start_kernel() as interrupts are enabled before they are fully set up. Fix this issue by setting the PF_IDLE flag later in the boot sequence. Although the boot task was marked as idle since (at least) d80e4fda576d, I am not sure that it is wrong to do so. The forced context-switch on idle task was introduced in the tiny_rcu update, so I'm going to claim this fixes 5f6130fa52ee. Fixes: 5f6130fa52ee ("tiny_rcu: Directly force QS when call_rcu_[bh|sched]() on idle_task") Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com> Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Cc: stable(a)vger.kernel.org Link: https://lore.kernel.org/linux-mm/CAMuHMdWpvpWoDa=Ox-do92czYRvkok6_x6pYUH+Zo… diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2299a5cfbfb9..802551e0009b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9269,7 +9269,7 @@ void __init init_idle(struct task_struct *idle, int cpu) * PF_KTHREAD should already be set at this point; regardless, make it * look like a proper per-CPU kthread. */ - idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY; + idle->flags |= PF_KTHREAD | PF_NO_SETAFFINITY; kthread_set_per_cpu(idle, cpu); #ifdef CONFIG_SMP diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 342f58a329f5..5007b25c5bc6 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -373,6 +373,7 @@ EXPORT_SYMBOL_GPL(play_idle_precise); void cpu_startup_entry(enum cpuhp_state state) { + current->flags |= PF_IDLE; arch_cpu_idle_prepare(); cpuhp_online_idle(state); while (1)

2 years

1
0
0 0

FAILED: patch "[PATCH] scsi: Do not attempt to rescan suspended devices" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x ff48b37802e5c134e2dfc4d091f10b2eb5065a72 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100455-impulse-shucking-f603@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From ff48b37802e5c134e2dfc4d091f10b2eb5065a72 Mon Sep 17 00:00:00 2001 From: Damien Le Moal <dlemoal(a)kernel.org> Date: Fri, 15 Sep 2023 15:00:13 +0900 Subject: [PATCH] scsi: Do not attempt to rescan suspended devices scsi_rescan_device() takes a scsi device lock before executing a device handler and device driver rescan methods. Waiting for the completion of any command issued to the device by these methods will thus be done with the device lock held. As a result, there is a risk of deadlocking within the power management code if scsi_rescan_device() is called to handle a device resume with the associated scsi device not yet resumed. Avoid such situation by checking that the target scsi device is in the running state, that is, fully capable of executing commands, before proceeding with the rescan and bailout returning -EWOULDBLOCK otherwise. With this error return, the caller can retry rescaning the device after a delay. The state check is done with the device lock held and is thus safe against incoming suspend power management operations. Fixes: 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume") Cc: stable(a)vger.kernel.org Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org> Reviewed-by: Hannes Reinecke <hare(a)suse.de> Reviewed-by: Niklas Cassel <niklas.cassel(a)wdc.com> Tested-by: Geert Uytterhoeven <geert+renesas(a)glider.be> Reviewed-by: Martin K. Petersen <martin.petersen(a)oracle.com> Reviewed-by: Bart Van Assche <bvanassche(a)acm.org> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 52014b2d39e1..3db4d31a03a1 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1619,12 +1619,24 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); -void scsi_rescan_device(struct scsi_device *sdev) +int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; + int ret = 0; device_lock(dev); + /* + * Bail out if the device is not running. Otherwise, the rescan may + * block waiting for commands to be executed, with us holding the + * device lock. This can result in a potential deadlock in the power + * management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING) { + ret = -EWOULDBLOCK; + goto unlock; + } + scsi_attach_vpd(sdev); scsi_cdl_check(sdev); @@ -1638,7 +1650,11 @@ void scsi_rescan_device(struct scsi_device *sdev) drv->rescan(dev); module_put(dev->driver->owner); } + +unlock: device_unlock(dev); + + return ret; } EXPORT_SYMBOL(scsi_rescan_device); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 49f768d0ff37..4c2dc8150c6d 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -764,7 +764,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht); #define scsi_template_proc_dir(sht) NULL #endif extern void scsi_scan_host(struct Scsi_Host *); -extern void scsi_rescan_device(struct scsi_device *); +extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); extern int scsi_host_busy(struct Scsi_Host *shost);

2 years

1
0
0 0

FAILED: patch "[PATCH] ata: libata-scsi: Fix delayed scsi_rescan_device() execution" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 8b4d9469d0b0e553208ee6f62f2807111fde18b9 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100444-gown-footsore-2fd8@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 8b4d9469d0b0e553208ee6f62f2807111fde18b9 Mon Sep 17 00:00:00 2001 From: Damien Le Moal <dlemoal(a)kernel.org> Date: Tue, 5 Sep 2023 09:06:23 +0900 Subject: [PATCH] ata: libata-scsi: Fix delayed scsi_rescan_device() execution Commit 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume") modified ata_scsi_dev_rescan() to check the scsi device "is_suspended" power field to ensure that the scsi device associated with an ATA device is fully resumed when scsi_rescan_device() is executed. However, this fix is problematic as: 1) It relies on a PM internal field that should not be used without PM device locking protection. 2) The check for is_suspended and the call to scsi_rescan_device() are not atomic and a suspend PM event may be triggered between them, casuing scsi_rescan_device() to be called on a suspended device and in that function blocking while holding the scsi device lock. This would deadlock a following resume operation. These problems can trigger PM deadlocks on resume, especially with resume operations triggered quickly after or during suspend operations. E.g., a simple bash script like: for (( i=0; i<10; i++ )); do echo "+2 > /sys/class/rtc/rtc0/wakealarm echo mem > /sys/power/state done that triggers a resume 2 seconds after starting suspending a system can quickly lead to a PM deadlock preventing the system from correctly resuming. Fix this by replacing the check on is_suspended with a check on the return value given by scsi_rescan_device() as that function will fail if called against a suspended device. Also make sure rescan tasks already scheduled are first cancelled before suspending an ata port. Fixes: 6aa0365a3c85 ("ata: libata-scsi: Avoid deadlock on rescan after device resume") Cc: stable(a)vger.kernel.org Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org> Reviewed-by: Hannes Reinecke <hare(a)suse.de> Reviewed-by: Niklas Cassel <niklas.cassel(a)wdc.com> Tested-by: Geert Uytterhoeven <geert+renesas(a)glider.be> Reviewed-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index a0bc01606b30..092372334e92 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5168,11 +5168,27 @@ static const unsigned int ata_port_suspend_ehi = ATA_EHI_QUIET static void ata_port_suspend(struct ata_port *ap, pm_message_t mesg) { + /* + * We are about to suspend the port, so we do not care about + * scsi_rescan_device() calls scheduled by previous resume operations. + * The next resume will schedule the rescan again. So cancel any rescan + * that is not done yet. + */ + cancel_delayed_work_sync(&ap->scsi_rescan_task); + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, false); } static void ata_port_suspend_async(struct ata_port *ap, pm_message_t mesg) { + /* + * We are about to suspend the port, so we do not care about + * scsi_rescan_device() calls scheduled by previous resume operations. + * The next resume will schedule the rescan again. So cancel any rescan + * that is not done yet. + */ + cancel_delayed_work_sync(&ap->scsi_rescan_task); + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, true); } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a0e58d22d222..6850cac803c1 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4756,7 +4756,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; - bool delay_rescan = false; + int ret = 0; mutex_lock(&ap->scsi_scan_mutex); spin_lock_irqsave(ap->lock, flags); @@ -4765,37 +4765,34 @@ void ata_scsi_dev_rescan(struct work_struct *work) ata_for_each_dev(dev, link, ENABLED) { struct scsi_device *sdev = dev->sdev; + /* + * If the port was suspended before this was scheduled, + * bail out. + */ + if (ap->pflags & ATA_PFLAG_SUSPENDED) + goto unlock; + if (!sdev) continue; if (scsi_device_get(sdev)) continue; - /* - * If the rescan work was scheduled because of a resume - * event, the port is already fully resumed, but the - * SCSI device may not yet be fully resumed. In such - * case, executing scsi_rescan_device() may cause a - * deadlock with the PM code on device_lock(). Prevent - * this by giving up and retrying rescan after a short - * delay. - */ - delay_rescan = sdev->sdev_gendev.power.is_suspended; - if (delay_rescan) { - scsi_device_put(sdev); - break; - } - spin_unlock_irqrestore(ap->lock, flags); - scsi_rescan_device(sdev); + ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); + + if (ret) + goto unlock; } } +unlock: spin_unlock_irqrestore(ap->lock, flags); mutex_unlock(&ap->scsi_scan_mutex); - if (delay_rescan) + /* Reschedule with a delay if scsi_rescan_device() returned an error */ + if (ret) schedule_delayed_work(&ap->scsi_rescan_task, msecs_to_jiffies(5)); }

2 years

1
0
0 0

FAILED: patch "[PATCH] scsi: sd: Do not issue commands to suspended disks on" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 99398d2070ab03d13f90b758ad397e19a65fffb0 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100441-granddad-chewable-dad2@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 99398d2070ab03d13f90b758ad397e19a65fffb0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal <dlemoal(a)kernel.org> Date: Fri, 8 Sep 2023 17:03:15 +0900 Subject: [PATCH] scsi: sd: Do not issue commands to suspended disks on shutdown If an error occurs when resuming a host adapter before the devices attached to the adapter are resumed, the adapter low level driver may remove the scsi host, resulting in a call to sd_remove() for the disks of the host. This in turn results in a call to sd_shutdown() which will issue a synchronize cache command and a start stop unit command to spindown the disk. sd_shutdown() issues the commands only if the device is not already runtime suspended but does not check the power state for system-wide suspend/resume. That is, the commands may be issued with the device in a suspended state, which causes PM resume to hang, forcing a reset of the machine to recover. Fix this by tracking the suspended state of a disk by introducing the suspended boolean field in the scsi_disk structure. This flag is set to true when the disk is suspended is sd_suspend_common() and resumed with sd_resume(). When suspended is true, sd_shutdown() is not executed from sd_remove(). Cc: stable(a)vger.kernel.org Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org> Reviewed-by: Hannes Reinecke <hare(a)suse.de> Reviewed-by: Bart Van Assche <bvanassche(a)acm.org> Reviewed-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5a1b802d180f..83b6a3f3863b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3741,7 +3741,8 @@ static int sd_remove(struct device *dev) device_del(&sdkp->disk_dev); del_gendisk(sdkp->disk); - sd_shutdown(dev); + if (!sdkp->suspended) + sd_shutdown(dev); put_disk(sdkp->disk); return 0; @@ -3872,6 +3873,9 @@ static int sd_suspend_common(struct device *dev, bool runtime) ret = 0; } + if (!ret) + sdkp->suspended = true; + return ret; } @@ -3891,21 +3895,26 @@ static int sd_suspend_runtime(struct device *dev) static int sd_resume(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - int ret; + int ret = 0; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; - if (!sd_do_start_stop(sdkp->device, runtime)) + if (!sd_do_start_stop(sdkp->device, runtime)) { + sdkp->suspended = false; return 0; + } if (!sdkp->device->no_start_on_resume) { sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); } - if (!ret) + if (!ret) { opal_unlock_from_suspend(sdkp->opal_dev); + sdkp->suspended = false; + } + return ret; } diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 5eea762f84d1..409dda5350d1 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -131,6 +131,7 @@ struct scsi_disk { u8 provisioning_mode; u8 zeroing_mode; u8 nr_actuators; /* Number of actuators */ + bool suspended; /* Disk is suspended (stopped) */ unsigned ATO : 1; /* state of disk ATO bit */ unsigned cache_override : 1; /* temp override of WCE,RCD */ unsigned WCE : 1; /* state of disk WCE bit */

2 years

1
0
0 0

FAILED: patch "[PATCH] scsi: sd: Do not issue commands to suspended disks on" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 99398d2070ab03d13f90b758ad397e19a65fffb0 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100439-blighted-enjoyment-32b2@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 99398d2070ab03d13f90b758ad397e19a65fffb0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal <dlemoal(a)kernel.org> Date: Fri, 8 Sep 2023 17:03:15 +0900 Subject: [PATCH] scsi: sd: Do not issue commands to suspended disks on shutdown If an error occurs when resuming a host adapter before the devices attached to the adapter are resumed, the adapter low level driver may remove the scsi host, resulting in a call to sd_remove() for the disks of the host. This in turn results in a call to sd_shutdown() which will issue a synchronize cache command and a start stop unit command to spindown the disk. sd_shutdown() issues the commands only if the device is not already runtime suspended but does not check the power state for system-wide suspend/resume. That is, the commands may be issued with the device in a suspended state, which causes PM resume to hang, forcing a reset of the machine to recover. Fix this by tracking the suspended state of a disk by introducing the suspended boolean field in the scsi_disk structure. This flag is set to true when the disk is suspended is sd_suspend_common() and resumed with sd_resume(). When suspended is true, sd_shutdown() is not executed from sd_remove(). Cc: stable(a)vger.kernel.org Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org> Reviewed-by: Hannes Reinecke <hare(a)suse.de> Reviewed-by: Bart Van Assche <bvanassche(a)acm.org> Reviewed-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5a1b802d180f..83b6a3f3863b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3741,7 +3741,8 @@ static int sd_remove(struct device *dev) device_del(&sdkp->disk_dev); del_gendisk(sdkp->disk); - sd_shutdown(dev); + if (!sdkp->suspended) + sd_shutdown(dev); put_disk(sdkp->disk); return 0; @@ -3872,6 +3873,9 @@ static int sd_suspend_common(struct device *dev, bool runtime) ret = 0; } + if (!ret) + sdkp->suspended = true; + return ret; } @@ -3891,21 +3895,26 @@ static int sd_suspend_runtime(struct device *dev) static int sd_resume(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - int ret; + int ret = 0; if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; - if (!sd_do_start_stop(sdkp->device, runtime)) + if (!sd_do_start_stop(sdkp->device, runtime)) { + sdkp->suspended = false; return 0; + } if (!sdkp->device->no_start_on_resume) { sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); } - if (!ret) + if (!ret) { opal_unlock_from_suspend(sdkp->opal_dev); + sdkp->suspended = false; + } + return ret; } diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 5eea762f84d1..409dda5350d1 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -131,6 +131,7 @@ struct scsi_disk { u8 provisioning_mode; u8 zeroing_mode; u8 nr_actuators; /* Number of actuators */ + bool suspended; /* Disk is suspended (stopped) */ unsigned ATO : 1; /* state of disk ATO bit */ unsigned cache_override : 1; /* temp override of WCE,RCD */ unsigned WCE : 1; /* state of disk WCE bit */

2 years

1
0
0 0

FAILED: patch "[PATCH] mptcp: fix dangling connection hang-up" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 27e5ccc2d5a50ed61bb73153edb1066104b108b3 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100415-word-epidural-cdee@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 27e5ccc2d5a50ed61bb73153edb1066104b108b3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Sat, 16 Sep 2023 12:52:49 +0200 Subject: [PATCH] mptcp: fix dangling connection hang-up According to RFC 8684 section 3.3: A connection is not closed unless [...] or an implementation-specific connection-level send timeout. Currently the MPTCP protocol does not implement such timeout, and connection timing-out at the TCP-level never move to close state. Introduces a catch-up condition at subflow close time to move the MPTCP socket to close, too. That additionally allows removing similar existing inside the worker. Finally, allow some additional timeout for plain ESTABLISHED mptcp sockets, as the protocol allows creating new subflows even at that point and making the connection functional again. This issue is actually present since the beginning, but it is basically impossible to solve without a long chain of functional pre-requisites topped by commit bbd49d114d57 ("mptcp: consolidate transition to TCP_CLOSE in mptcp_do_fastclose()"). When backporting this current patch, please also backport this other commit as well. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430 Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") Cc: stable(a)vger.kernel.org Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: David S. Miller <davem(a)davemloft.net> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c8f38f303a90..e252539b1e19 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -892,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); + mptcp_stop_tout_timer(sk); return true; } @@ -2369,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, bool dispose_it, need_push = false; /* If the first subflow moved to a close state before accept, e.g. due - * to an incoming reset, mptcp either: - * - if either the subflow or the msk are dead, destroy the context - * (the subflow socket is deleted by inet_child_forget) and the msk - * - otherwise do nothing at the moment and take action at accept and/or - * listener shutdown - user-space must be able to accept() the closed - * socket. + * to an incoming reset or listener shutdown, the subflow socket is + * already deleted by inet_child_forget() and the mptcp socket can't + * survive too. */ - if (msk->in_accept_queue && msk->first == ssk) { - if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) - return; - + if (msk->in_accept_queue && msk->first == ssk && + (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) { /* ensure later check in mptcp_worker() will dispose the msk */ + mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1)); sock_set_flag(sk, SOCK_DEAD); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); mptcp_subflow_drop_ctx(ssk); @@ -2443,6 +2440,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, out: if (need_push) __mptcp_push_pending(sk, 0); + + /* Catch every 'all subflows closed' scenario, including peers silently + * closing them, e.g. due to timeout. + * For established sockets, allow an additional timeout before closing, + * as the protocol can still create more subflows. + */ + if (list_is_singular(&msk->conn_list) && msk->first && + inet_sk_state_load(msk->first) == TCP_CLOSE) { + if (sk->sk_state != TCP_ESTABLISHED || + msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_close_wake_up(sk); + } else { + mptcp_start_tout_timer(sk); + } + } } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2486,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk) } -static bool mptcp_should_close(const struct sock *sk) +static bool mptcp_close_tout_expired(const struct sock *sk) { - s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; - struct mptcp_subflow_context *subflow; + if (!inet_csk(sk)->icsk_mtup.probe_timestamp || + sk->sk_state == TCP_CLOSE) + return false; - if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) - return true; - - /* if all subflows are in closed status don't bother with additional - * timeout - */ - mptcp_for_each_subflow(mptcp_sk(sk), subflow) { - if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) != - TCP_CLOSE) - return false; - } - return true; + return time_after32(tcp_jiffies32, + inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN); } static void mptcp_check_fastclose(struct mptcp_sock *msk) @@ -2641,15 +2645,16 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) struct sock *sk = (struct sock *)msk; unsigned long timeout, close_timeout; - if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) + if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp) return; - close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; + close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + + TCP_TIMEWAIT_LEN; /* the close timeout takes precedence on the fail one, and here at least one of * them is active */ - timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; + timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout; sk_reset_timer(sk, &sk->sk_timer, timeout); } @@ -2668,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) mptcp_subflow_reset(ssk); WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); unlock_sock_fast(ssk, slow); - - mptcp_reset_tout_timer(msk, 0); } static void mptcp_do_fastclose(struct sock *sk) @@ -2706,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(sk); - /* There is no point in keeping around an orphaned sk timedout or - * closed, but we need the msk around to reply to incoming DATA_FIN, - * even if it is orphaned and in FIN_WAIT2 state - */ - if (sock_flag(sk, SOCK_DEAD)) { - if (mptcp_should_close(sk)) - mptcp_do_fastclose(sk); + if (mptcp_close_tout_expired(sk)) { + mptcp_do_fastclose(sk); + mptcp_close_wake_up(sk); + } - if (sk->sk_state == TCP_CLOSE) { - __mptcp_destroy_sock(sk); - goto unlock; - } + if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) { + __mptcp_destroy_sock(sk); + goto unlock; } if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) @@ -3016,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout) cleanup: /* orphan all the subflows */ - inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast_nested(ssk); @@ -3053,7 +3051,7 @@ bool __mptcp_close(struct sock *sk, long timeout) __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - mptcp_reset_tout_timer(msk, 0); + mptcp_start_tout_timer(sk); } return do_cancel_work; @@ -3117,7 +3115,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_rtx_timer(sk); - sk_stop_timer(sk, &sk->sk_timer); + mptcp_stop_tout_timer(sk); if (msk->token) mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 5e2026815c8e..ed61d6850cce 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -719,6 +719,28 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); + +static inline void mptcp_stop_tout_timer(struct sock *sk) +{ + if (!inet_csk(sk)->icsk_mtup.probe_timestamp) + return; + + sk_stop_timer(sk, &sk->sk_timer); + inet_csk(sk)->icsk_mtup.probe_timestamp = 0; +} + +static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout) +{ + /* avoid 0 timestamp, as that means no close timeout */ + inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1; +} + +static inline void mptcp_start_tout_timer(struct sock *sk) +{ + mptcp_set_close_tout(sk, tcp_jiffies32); + mptcp_reset_tout_timer(mptcp_sk(sk), 0); +} + static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 433f290984c8..918c1a235790 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); WRITE_ONCE(msk->allow_infinite_fallback, false); + mptcp_stop_tout_timer(sk); return 0; failed_unlink:

2 years

1
0
0 0

FAILED: patch "[PATCH] mptcp: fix dangling connection hang-up" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 27e5ccc2d5a50ed61bb73153edb1066104b108b3 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100414-reverse-yippee-3d2e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 27e5ccc2d5a50ed61bb73153edb1066104b108b3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Sat, 16 Sep 2023 12:52:49 +0200 Subject: [PATCH] mptcp: fix dangling connection hang-up According to RFC 8684 section 3.3: A connection is not closed unless [...] or an implementation-specific connection-level send timeout. Currently the MPTCP protocol does not implement such timeout, and connection timing-out at the TCP-level never move to close state. Introduces a catch-up condition at subflow close time to move the MPTCP socket to close, too. That additionally allows removing similar existing inside the worker. Finally, allow some additional timeout for plain ESTABLISHED mptcp sockets, as the protocol allows creating new subflows even at that point and making the connection functional again. This issue is actually present since the beginning, but it is basically impossible to solve without a long chain of functional pre-requisites topped by commit bbd49d114d57 ("mptcp: consolidate transition to TCP_CLOSE in mptcp_do_fastclose()"). When backporting this current patch, please also backport this other commit as well. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430 Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") Cc: stable(a)vger.kernel.org Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: David S. Miller <davem(a)davemloft.net> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c8f38f303a90..e252539b1e19 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -892,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); mptcp_subflow_joined(msk, ssk); + mptcp_stop_tout_timer(sk); return true; } @@ -2369,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, bool dispose_it, need_push = false; /* If the first subflow moved to a close state before accept, e.g. due - * to an incoming reset, mptcp either: - * - if either the subflow or the msk are dead, destroy the context - * (the subflow socket is deleted by inet_child_forget) and the msk - * - otherwise do nothing at the moment and take action at accept and/or - * listener shutdown - user-space must be able to accept() the closed - * socket. + * to an incoming reset or listener shutdown, the subflow socket is + * already deleted by inet_child_forget() and the mptcp socket can't + * survive too. */ - if (msk->in_accept_queue && msk->first == ssk) { - if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD)) - return; - + if (msk->in_accept_queue && msk->first == ssk && + (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) { /* ensure later check in mptcp_worker() will dispose the msk */ + mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1)); sock_set_flag(sk, SOCK_DEAD); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); mptcp_subflow_drop_ctx(ssk); @@ -2443,6 +2440,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, out: if (need_push) __mptcp_push_pending(sk, 0); + + /* Catch every 'all subflows closed' scenario, including peers silently + * closing them, e.g. due to timeout. + * For established sockets, allow an additional timeout before closing, + * as the protocol can still create more subflows. + */ + if (list_is_singular(&msk->conn_list) && msk->first && + inet_sk_state_load(msk->first) == TCP_CLOSE) { + if (sk->sk_state != TCP_ESTABLISHED || + msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_close_wake_up(sk); + } else { + mptcp_start_tout_timer(sk); + } + } } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2486,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk) } -static bool mptcp_should_close(const struct sock *sk) +static bool mptcp_close_tout_expired(const struct sock *sk) { - s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp; - struct mptcp_subflow_context *subflow; + if (!inet_csk(sk)->icsk_mtup.probe_timestamp || + sk->sk_state == TCP_CLOSE) + return false; - if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue) - return true; - - /* if all subflows are in closed status don't bother with additional - * timeout - */ - mptcp_for_each_subflow(mptcp_sk(sk), subflow) { - if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) != - TCP_CLOSE) - return false; - } - return true; + return time_after32(tcp_jiffies32, + inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN); } static void mptcp_check_fastclose(struct mptcp_sock *msk) @@ -2641,15 +2645,16 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) struct sock *sk = (struct sock *)msk; unsigned long timeout, close_timeout; - if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) + if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp) return; - close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; + close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + + TCP_TIMEWAIT_LEN; /* the close timeout takes precedence on the fail one, and here at least one of * them is active */ - timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; + timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout; sk_reset_timer(sk, &sk->sk_timer, timeout); } @@ -2668,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) mptcp_subflow_reset(ssk); WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); unlock_sock_fast(ssk, slow); - - mptcp_reset_tout_timer(msk, 0); } static void mptcp_do_fastclose(struct sock *sk) @@ -2706,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(sk); - /* There is no point in keeping around an orphaned sk timedout or - * closed, but we need the msk around to reply to incoming DATA_FIN, - * even if it is orphaned and in FIN_WAIT2 state - */ - if (sock_flag(sk, SOCK_DEAD)) { - if (mptcp_should_close(sk)) - mptcp_do_fastclose(sk); + if (mptcp_close_tout_expired(sk)) { + mptcp_do_fastclose(sk); + mptcp_close_wake_up(sk); + } - if (sk->sk_state == TCP_CLOSE) { - __mptcp_destroy_sock(sk); - goto unlock; - } + if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) { + __mptcp_destroy_sock(sk); + goto unlock; } if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) @@ -3016,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout) cleanup: /* orphan all the subflows */ - inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast_nested(ssk); @@ -3053,7 +3051,7 @@ bool __mptcp_close(struct sock *sk, long timeout) __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - mptcp_reset_tout_timer(msk, 0); + mptcp_start_tout_timer(sk); } return do_cancel_work; @@ -3117,7 +3115,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_rtx_timer(sk); - sk_stop_timer(sk, &sk->sk_timer); + mptcp_stop_tout_timer(sk); if (msk->token) mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 5e2026815c8e..ed61d6850cce 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -719,6 +719,28 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); + +static inline void mptcp_stop_tout_timer(struct sock *sk) +{ + if (!inet_csk(sk)->icsk_mtup.probe_timestamp) + return; + + sk_stop_timer(sk, &sk->sk_timer); + inet_csk(sk)->icsk_mtup.probe_timestamp = 0; +} + +static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout) +{ + /* avoid 0 timestamp, as that means no close timeout */ + inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1; +} + +static inline void mptcp_start_tout_timer(struct sock *sk) +{ + mptcp_set_close_tout(sk, tcp_jiffies32); + mptcp_reset_tout_timer(mptcp_sk(sk), 0); +} + static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 433f290984c8..918c1a235790 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); WRITE_ONCE(msk->allow_infinite_fallback, false); + mptcp_stop_tout_timer(sk); return 0; failed_unlink:

2 years

1
0
0 0

FAILED: patch "[PATCH] mptcp: rename timer related helper to less confusing names" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x f6909dc1c1f4452879278128012da6c76bc186a5 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100401-cultural-radiator-bc5e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From f6909dc1c1f4452879278128012da6c76bc186a5 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Sat, 16 Sep 2023 12:52:48 +0200 Subject: [PATCH] mptcp: rename timer related helper to less confusing names The msk socket uses to different timeout to track close related events and retransmissions. The existing helpers do not indicate clearly which timer they actually touch, making the related code quite confusing. Change the existing helpers name to avoid such confusion. No functional change intended. This patch is linked to the next one ("mptcp: fix dangling connection hang-up"). The two patches are supposed to be backported together. Cc: stable(a)vger.kernel.org # v5.11+ Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: David S. Miller <davem(a)davemloft.net> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1c96b8da71df..c8f38f303a90 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -405,7 +405,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, return false; } -static void mptcp_stop_timer(struct sock *sk) +static void mptcp_stop_rtx_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -911,12 +911,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list } } -static bool mptcp_timer_pending(struct sock *sk) +static bool mptcp_rtx_timer_pending(struct sock *sk) { return timer_pending(&inet_csk(sk)->icsk_retransmit_timer); } -static void mptcp_reset_timer(struct sock *sk) +static void mptcp_reset_rtx_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); unsigned long tout; @@ -1050,10 +1050,10 @@ static void __mptcp_clean_una(struct sock *sk) out: if (snd_una == READ_ONCE(msk->snd_nxt) && snd_una == READ_ONCE(msk->write_seq)) { - if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) - mptcp_stop_timer(sk); + if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) + mptcp_stop_rtx_timer(sk); } else { - mptcp_reset_timer(sk); + mptcp_reset_rtx_timer(sk); } } @@ -1626,8 +1626,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) mptcp_push_release(ssk, &info); /* ensure the rtx timer is running */ - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); if (do_check_data_fin) mptcp_check_send_data_fin(sk); } @@ -1690,8 +1690,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool if (copied) { tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); if (msk->snd_data_fin_enable && msk->snd_nxt + 1 == msk->write_seq) @@ -2260,7 +2260,7 @@ static void mptcp_retransmit_timer(struct timer_list *t) sock_put(sk); } -static void mptcp_timeout_timer(struct timer_list *t) +static void mptcp_tout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); @@ -2629,14 +2629,14 @@ static void __mptcp_retrans(struct sock *sk) reset_timer: mptcp_check_and_set_pending(sk); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); } /* schedule the timeout timer for the relevant event: either close timeout * or mp_fail timeout. The close timeout takes precedence on the mp_fail one */ -void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout) +void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) { struct sock *sk = (struct sock *)msk; unsigned long timeout, close_timeout; @@ -2669,7 +2669,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); unlock_sock_fast(ssk, slow); - mptcp_reset_timeout(msk, 0); + mptcp_reset_tout_timer(msk, 0); } static void mptcp_do_fastclose(struct sock *sk) @@ -2758,7 +2758,7 @@ static void __mptcp_init_sock(struct sock *sk) /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); - timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); + timer_setup(&sk->sk_timer, mptcp_tout_timer, 0); } static void mptcp_ca_reset(struct sock *sk) @@ -2849,8 +2849,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); tcp_send_ack(ssk); - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (!mptcp_rtx_timer_pending(sk)) + mptcp_reset_rtx_timer(sk); } break; } @@ -2933,7 +2933,7 @@ static void __mptcp_destroy_sock(struct sock *sk) might_sleep(); - mptcp_stop_timer(sk); + mptcp_stop_rtx_timer(sk); sk_stop_timer(sk, &sk->sk_timer); msk->pm.status = 0; mptcp_release_sched(msk); @@ -3053,7 +3053,7 @@ bool __mptcp_close(struct sock *sk, long timeout) __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - mptcp_reset_timeout(msk, 0); + mptcp_reset_tout_timer(msk, 0); } return do_cancel_work; @@ -3116,7 +3116,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_check_listen_stop(sk); inet_sk_state_store(sk, TCP_CLOSE); - mptcp_stop_timer(sk); + mptcp_stop_rtx_timer(sk); sk_stop_timer(sk, &sk->sk_timer); if (msk->token) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7254b3562575..5e2026815c8e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -718,7 +718,7 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); -void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout); +void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout); static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 2f40c23fdb0d..433f290984c8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) WRITE_ONCE(subflow->fail_tout, fail_tout); tcp_send_ack(ssk); - mptcp_reset_timeout(msk, subflow->fail_tout); + mptcp_reset_tout_timer(msk, subflow->fail_tout); } static bool subflow_check_data_avail(struct sock *ssk)

2 years

1
0
0 0

FAILED: patch "[PATCH] mptcp: process pending subflow error on close" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 9f1a98813b4b686482e5ef3c9d998581cace0ba6 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100446-wasp-granny-2378@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 9f1a98813b4b686482e5ef3c9d998581cace0ba6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Sat, 16 Sep 2023 12:52:47 +0200 Subject: [PATCH] mptcp: process pending subflow error on close On incoming TCP reset, subflow closing could happen before error propagation. That in turn could cause the socket error being ignored, and a missing socket state transition, as reported by Daire-Byrne. Address the issues explicitly checking for subflow socket error at close time. To avoid code duplication, factor-out of __mptcp_error_report() a new helper implementing the relevant bits. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/429 Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk") Cc: stable(a)vger.kernel.org Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: David S. Miller <davem(a)davemloft.net> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 915860027b1a..1c96b8da71df 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -770,40 +770,44 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } +static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) +{ + int err = sock_error(ssk); + int ssk_state; + + if (!err) + return false; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk))) + return false; + + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + return true; +} + void __mptcp_error_report(struct sock *sk) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } + mptcp_for_each_subflow(msk, subflow) + if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow))) + break; } /* In most cases we will be able to lock the mptcp socket. If its already @@ -2428,6 +2432,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, } out_release: + __mptcp_subflow_error_report(sk, ssk); release_sock(ssk); sock_put(ssk);

2 years

1
0
0 0

FAILED: patch "[PATCH] mptcp: move __mptcp_error_report in protocol.c" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x d5fbeff1ab812b6c473b6924bee8748469462e2c # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100423-modify-dreadlock-ee1f@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d5fbeff1ab812b6c473b6924bee8748469462e2c Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Sat, 16 Sep 2023 12:52:46 +0200 Subject: [PATCH] mptcp: move __mptcp_error_report in protocol.c This will simplify the next patch ("mptcp: process pending subflow error on close"). No functional change intended. Cc: stable(a)vger.kernel.org # v5.12+ Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: David S. Miller <davem(a)davemloft.net> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a7fc16f5175d..915860027b1a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -770,6 +770,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err = sock_error(ssk); + int ssk_state; + + if (!err) + continue; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) + continue; + + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + break; + } +} + /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9bf3c7bc1762..2f40c23fdb0d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -void __mptcp_error_report(struct sock *sk) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } -} - static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn;

2 years

1
0
0 0

FAILED: patch "[PATCH] mptcp: move __mptcp_error_report in protocol.c" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x d5fbeff1ab812b6c473b6924bee8748469462e2c # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100421-earthly-entail-0945@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d5fbeff1ab812b6c473b6924bee8748469462e2c Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Sat, 16 Sep 2023 12:52:46 +0200 Subject: [PATCH] mptcp: move __mptcp_error_report in protocol.c This will simplify the next patch ("mptcp: process pending subflow error on close"). No functional change intended. Cc: stable(a)vger.kernel.org # v5.12+ Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: David S. Miller <davem(a)davemloft.net> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a7fc16f5175d..915860027b1a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -770,6 +770,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err = sock_error(ssk); + int ssk_state; + + if (!err) + continue; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) + continue; + + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + break; + } +} + /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9bf3c7bc1762..2f40c23fdb0d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -void __mptcp_error_report(struct sock *sk) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } -} - static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn;

2 years

1
0
0 0

FAILED: patch "[PATCH] mptcp: move __mptcp_error_report in protocol.c" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x d5fbeff1ab812b6c473b6924bee8748469462e2c # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100422-unnerve-zodiac-03e0@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d5fbeff1ab812b6c473b6924bee8748469462e2c Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Sat, 16 Sep 2023 12:52:46 +0200 Subject: [PATCH] mptcp: move __mptcp_error_report in protocol.c This will simplify the next patch ("mptcp: process pending subflow error on close"). No functional change intended. Cc: stable(a)vger.kernel.org # v5.12+ Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: David S. Miller <davem(a)davemloft.net> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a7fc16f5175d..915860027b1a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -770,6 +770,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err = sock_error(ssk); + int ssk_state; + + if (!err) + continue; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) + continue; + + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + break; + } +} + /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9bf3c7bc1762..2f40c23fdb0d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -void __mptcp_error_report(struct sock *sk) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } -} - static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn;

2 years

1
0
0 0

FAILED: patch "[PATCH] mptcp: move __mptcp_error_report in protocol.c" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x d5fbeff1ab812b6c473b6924bee8748469462e2c # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100420-spirits-clumsy-63d9@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From d5fbeff1ab812b6c473b6924bee8748469462e2c Mon Sep 17 00:00:00 2001 From: Paolo Abeni <pabeni(a)redhat.com> Date: Sat, 16 Sep 2023 12:52:46 +0200 Subject: [PATCH] mptcp: move __mptcp_error_report in protocol.c This will simplify the next patch ("mptcp: process pending subflow error on close"). No functional change intended. Cc: stable(a)vger.kernel.org # v5.12+ Signed-off-by: Paolo Abeni <pabeni(a)redhat.com> Reviewed-by: Mat Martineau <martineau(a)kernel.org> Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net> Signed-off-by: David S. Miller <davem(a)davemloft.net> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a7fc16f5175d..915860027b1a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -770,6 +770,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk) return moved; } +void __mptcp_error_report(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + int err = sock_error(ssk); + int ssk_state; + + if (!err) + continue; + + /* only propagate errors on fallen-back sockets or + * on MPC connect + */ + if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) + continue; + + /* We need to propagate only transition to CLOSE state. + * Orphaned socket will see such state change via + * subflow_sched_work_if_closed() and that path will properly + * destroy the msk as needed. + */ + ssk_state = inet_sk_state_load(ssk); + if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) + inet_sk_state_store(sk, ssk_state); + WRITE_ONCE(sk->sk_err, -err); + + /* This barrier is coupled with smp_rmb() in mptcp_poll() */ + smp_wmb(); + sk_error_report(sk); + break; + } +} + /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9bf3c7bc1762..2f40c23fdb0d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -void __mptcp_error_report(struct sock *sk) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } -} - static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn;

2 years

1
0
0 0

FAILED: patch "[PATCH] KVM: x86/mmu: Stop zapping invalidated TDP MMU roots" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 0df9dab891ff0d9b646d82e4fe038229e4c02451 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100425-avoid-alkaline-3fd9@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 0df9dab891ff0d9b646d82e4fe038229e4c02451 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc(a)google.com> Date: Fri, 15 Sep 2023 17:39:15 -0700 Subject: [PATCH] KVM: x86/mmu: Stop zapping invalidated TDP MMU roots asynchronously Stop zapping invalidate TDP MMU roots via work queue now that KVM preserves TDP MMU roots until they are explicitly invalidated. Zapping roots asynchronously was effectively a workaround to avoid stalling a vCPU for an extended during if a vCPU unloaded a root, which at the time happened whenever the guest toggled CR0.WP (a frequent operation for some guest kernels). While a clever hack, zapping roots via an unbound worker had subtle, unintended consequences on host scheduling, especially when zapping multiple roots, e.g. as part of a memslot. Because the work of zapping a root is no longer bound to the task that initiated the zap, things like the CPU affinity and priority of the original task get lost. Losing the affinity and priority can be especially problematic if unbound workqueues aren't affined to a small number of CPUs, as zapping multiple roots can cause KVM to heavily utilize the majority of CPUs in the system, *beyond* the CPUs KVM is already using to run vCPUs. When deleting a memslot via KVM_SET_USER_MEMORY_REGION, the async root zap can result in KVM occupying all logical CPUs for ~8ms, and result in high priority tasks not being scheduled in in a timely manner. In v5.15, which doesn't preserve unloaded roots, the issues were even more noticeable as KVM would zap roots more frequently and could occupy all CPUs for 50ms+. Consuming all CPUs for an extended duration can lead to significant jitter throughout the system, e.g. on ChromeOS with virtio-gpu, deleting memslots is a semi-frequent operation as memslots are deleted and recreated with different host virtual addresses to react to host GPU drivers allocating and freeing GPU blobs. On ChromeOS, the jitter manifests as audio blips during games due to the audio server's tasks not getting scheduled in promptly, despite the tasks having a high realtime priority. Deleting memslots isn't exactly a fast path and should be avoided when possible, and ChromeOS is working towards utilizing MAP_FIXED to avoid the memslot shenanigans, but KVM is squarely in the wrong. Not to mention that removing the async zapping eliminates a non-trivial amount of complexity. Note, one of the subtle behaviors hidden behind the async zapping is that KVM would zap invalidated roots only once (ignoring partial zaps from things like mmu_notifier events). Preserve this behavior by adding a flag to identify roots that are scheduled to be zapped versus roots that have already been zapped but not yet freed. Add a comment calling out why kvm_tdp_mmu_invalidate_all_roots() can encounter invalid roots, as it's not at all obvious why zapping invalidated roots shouldn't simply zap all invalid roots. Reported-by: Pattara Teerapong <pteerapong(a)google.com> Cc: David Stevens <stevensd(a)google.com> Cc: Yiwei Zhang<zzyiwei(a)google.com> Cc: Paul Hsia <paulhsia(a)google.com> Cc: stable(a)vger.kernel.org Signed-off-by: Sean Christopherson <seanjc(a)google.com> Message-Id: <20230916003916.2545000-4-seanjc(a)google.com> Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1a4def36d5bb..17715cb8731d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1419,7 +1419,6 @@ struct kvm_arch { * the thread holds the MMU lock in write mode. */ spinlock_t tdp_mmu_pages_lock; - struct workqueue_struct *tdp_mmu_zap_wq; #endif /* CONFIG_X86_64 */ /* @@ -1835,7 +1834,7 @@ void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -int kvm_mmu_init_vm(struct kvm *kvm); +void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 54f94f644b42..f7901cb4d2fa 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6167,20 +6167,15 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); } -int kvm_mmu_init_vm(struct kvm *kvm) +void kvm_mmu_init_vm(struct kvm *kvm) { - int r; - INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); - if (tdp_mmu_enabled) { - r = kvm_mmu_init_tdp_mmu(kvm); - if (r < 0) - return r; - } + if (tdp_mmu_enabled) + kvm_mmu_init_tdp_mmu(kvm); kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache; kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO; @@ -6189,8 +6184,6 @@ int kvm_mmu_init_vm(struct kvm *kvm) kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache; kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO; - - return 0; } static void mmu_free_vm_memory_caches(struct kvm *kvm) diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index b102014e2c60..decc1f153669 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -58,7 +58,12 @@ struct kvm_mmu_page { bool tdp_mmu_page; bool unsync; - u8 mmu_valid_gen; + union { + u8 mmu_valid_gen; + + /* Only accessed under slots_lock. */ + bool tdp_mmu_scheduled_root_to_zap; + }; /* * The shadow page can't be replaced by an equivalent huge page @@ -100,13 +105,7 @@ struct kvm_mmu_page { struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ tdp_ptep_t ptep; }; - union { - DECLARE_BITMAP(unsync_child_bitmap, 512); - struct { - struct work_struct tdp_mmu_async_work; - void *tdp_mmu_async_data; - }; - }; + DECLARE_BITMAP(unsync_child_bitmap, 512); /* * Tracks shadow pages that, if zapped, would allow KVM to create an NX diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index aa90901d2871..6cd4dd631a2f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -12,18 +12,10 @@ #include <trace/events/kvm.h> /* Initializes the TDP MMU for the VM, if enabled. */ -int kvm_mmu_init_tdp_mmu(struct kvm *kvm) +void kvm_mmu_init_tdp_mmu(struct kvm *kvm) { - struct workqueue_struct *wq; - - wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0); - if (!wq) - return -ENOMEM; - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); - kvm->arch.tdp_mmu_zap_wq = wq; - return 1; } /* Arbitrarily returns true so that this may be used in if statements. */ @@ -46,20 +38,15 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) * ultimately frees all roots. */ kvm_tdp_mmu_invalidate_all_roots(kvm); - - /* - * Destroying a workqueue also first flushes the workqueue, i.e. no - * need to invoke kvm_tdp_mmu_zap_invalidated_roots(). - */ - destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); + kvm_tdp_mmu_zap_invalidated_roots(kvm); WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* * Ensure that all the outstanding RCU callbacks to free shadow pages - * can run before the VM is torn down. Work items on tdp_mmu_zap_wq - * can call kvm_tdp_mmu_put_root and create new callbacks. + * can run before the VM is torn down. Putting the last reference to + * zapped roots will create new callbacks. */ rcu_barrier(); } @@ -86,46 +73,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) tdp_mmu_free_sp(sp); } -static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, - bool shared); - -static void tdp_mmu_zap_root_work(struct work_struct *work) -{ - struct kvm_mmu_page *root = container_of(work, struct kvm_mmu_page, - tdp_mmu_async_work); - struct kvm *kvm = root->tdp_mmu_async_data; - - read_lock(&kvm->mmu_lock); - - /* - * A TLB flush is not necessary as KVM performs a local TLB flush when - * allocating a new root (see kvm_mmu_load()), and when migrating vCPU - * to a different pCPU. Note, the local TLB flush on reuse also - * invalidates any paging-structure-cache entries, i.e. TLB entries for - * intermediate paging structures, that may be zapped, as such entries - * are associated with the ASID on both VMX and SVM. - */ - tdp_mmu_zap_root(kvm, root, true); - - /* - * Drop the refcount using kvm_tdp_mmu_put_root() to test its logic for - * avoiding an infinite loop. By design, the root is reachable while - * it's being asynchronously zapped, thus a different task can put its - * last reference, i.e. flowing through kvm_tdp_mmu_put_root() for an - * asynchronously zapped root is unavoidable. - */ - kvm_tdp_mmu_put_root(kvm, root, true); - - read_unlock(&kvm->mmu_lock); -} - -static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root) -{ - root->tdp_mmu_async_data = kvm; - INIT_WORK(&root->tdp_mmu_async_work, tdp_mmu_zap_root_work); - queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work); -} - void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { @@ -211,11 +158,11 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, #define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \ __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true) -#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ - for (_root = tdp_mmu_next_root(_kvm, NULL, false, false); \ +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \ + for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \ _root; \ - _root = tdp_mmu_next_root(_kvm, _root, false, false)) \ - if (!kvm_lockdep_assert_mmu_lock_held(_kvm, false)) { \ + _root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \ + if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \ } else /* @@ -296,7 +243,7 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu) * by a memslot update or by the destruction of the VM. Initialize the * refcount to two; one reference for the vCPU, and one reference for * the TDP MMU itself, which is held until the root is invalidated and - * is ultimately put by tdp_mmu_zap_root_work(). + * is ultimately put by kvm_tdp_mmu_zap_invalidated_roots(). */ refcount_set(&root->tdp_mmu_root_count, 2); @@ -885,7 +832,7 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush) { struct kvm_mmu_page *root; - for_each_tdp_mmu_root_yield_safe(kvm, root) + for_each_tdp_mmu_root_yield_safe(kvm, root, false) flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush); return flush; @@ -907,7 +854,7 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) * is being destroyed or the userspace VMM has exited. In both cases, * KVM_RUN is unreachable, i.e. no vCPUs will ever service the request. */ - for_each_tdp_mmu_root_yield_safe(kvm, root) + for_each_tdp_mmu_root_yield_safe(kvm, root, false) tdp_mmu_zap_root(kvm, root, false); } @@ -917,18 +864,47 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm) */ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) { - flush_workqueue(kvm->arch.tdp_mmu_zap_wq); + struct kvm_mmu_page *root; + + read_lock(&kvm->mmu_lock); + + for_each_tdp_mmu_root_yield_safe(kvm, root, true) { + if (!root->tdp_mmu_scheduled_root_to_zap) + continue; + + root->tdp_mmu_scheduled_root_to_zap = false; + KVM_BUG_ON(!root->role.invalid, kvm); + + /* + * A TLB flush is not necessary as KVM performs a local TLB + * flush when allocating a new root (see kvm_mmu_load()), and + * when migrating a vCPU to a different pCPU. Note, the local + * TLB flush on reuse also invalidates paging-structure-cache + * entries, i.e. TLB entries for intermediate paging structures, + * that may be zapped, as such entries are associated with the + * ASID on both VMX and SVM. + */ + tdp_mmu_zap_root(kvm, root, true); + + /* + * The referenced needs to be put *after* zapping the root, as + * the root must be reachable by mmu_notifiers while it's being + * zapped + */ + kvm_tdp_mmu_put_root(kvm, root, true); + } + + read_unlock(&kvm->mmu_lock); } /* * Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that * is about to be zapped, e.g. in response to a memslots update. The actual - * zapping is performed asynchronously. Using a separate workqueue makes it - * easy to ensure that the destruction is performed before the "fast zap" - * completes, without keeping a separate list of invalidated roots; the list is - * effectively the list of work items in the workqueue. + * zapping is done separately so that it happens with mmu_lock with read, + * whereas invalidating roots must be done with mmu_lock held for write (unless + * the VM is being destroyed). * - * Note, the asynchronous worker is gifted the TDP MMU's reference. + * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference. * See kvm_tdp_mmu_get_vcpu_root_hpa(). */ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) @@ -953,19 +929,20 @@ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) /* * As above, mmu_lock isn't held when destroying the VM! There can't * be other references to @kvm, i.e. nothing else can invalidate roots - * or be consuming roots, but walking the list of roots does need to be - * guarded against roots being deleted by the asynchronous zap worker. + * or get/put references to roots. */ - rcu_read_lock(); - - list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) { + list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { + /* + * Note, invalid roots can outlive a memslot update! Invalid + * roots must be *zapped* before the memslot update completes, + * but a different task can acquire a reference and keep the + * root alive after its been zapped. + */ if (!root->role.invalid) { + root->tdp_mmu_scheduled_root_to_zap = true; root->role.invalid = true; - tdp_mmu_schedule_zap_root(kvm, root); } } - - rcu_read_unlock(); } /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index bc088953f929..733a3aef3a96 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -7,7 +7,7 @@ #include "spte.h" -int kvm_mmu_init_tdp_mmu(struct kvm *kvm); +void kvm_mmu_init_tdp_mmu(struct kvm *kvm); void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm); hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c9c81e82e65..9f18b06bbda6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12308,9 +12308,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out; - ret = kvm_mmu_init_vm(kvm); - if (ret) - goto out_page_track; + kvm_mmu_init_vm(kvm); ret = static_call(kvm_x86_vm_init)(kvm); if (ret) @@ -12355,7 +12353,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) out_uninit_mmu: kvm_mmu_uninit_vm(kvm); -out_page_track: kvm_page_track_cleanup(kvm); out: return ret;

2 years

1
0
0 0

FAILED: patch "[PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y git checkout FETCH_HEAD git cherry-pick -x 1527b076ae2cb6a9c590a02725ed39399fcad1cf # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100421-tribute-plausible-7fed@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^.. Possible dependencies: 1527b076ae2c ("spi: zynqmp-gqspi: fix clock imbalance on probe failure") 3ffefa1d9c9e ("spi: zynqmp-gqspi: Convert to platform remove callback returning void") 58eaa7b2d07d ("spi: spi-zynqmp-gqspi: Fix runtime PM imbalance in zynqmp_qspi_probe") 1c26372e5aa9 ("spi: spi-zynqmp-gqspi: Update driver to use spi-mem framework") 91af6eb04a6b ("spi: spi-zynqmp-gqspi: Fix kernel-doc warnings") 4b42b0b49812 ("spi: spi-zynqmp-gqspi: Correct a couple of misspellings in kerneldoc") 4db8180ffe7c ("firmware: xilinx: Remove eemi ops for fpga related APIs") bc86f9c54616 ("firmware: xilinx: Remove eemi ops for aes engine") cbbbda71fe37 ("firmware: xilinx: Remove eemi ops for set_requirement") 07fb1a4619fc ("firmware: xilinx: Remove eemi ops for release_node") bf8b27ed2324 ("firmware: xilinx: Remove eemi ops for request_node") 951d0a97e41c ("firmware: xilinx: Remove eemi ops for set_suspend_mode") 9474da950d1e ("firmware: xilinx: Remove eemi ops for init_finalize") 1b413581fe26 ("firmware: xilinx: Remove eemi ops for reset_get_status") cf23ec353146 ("firmware: xilinx: Remove eemi ops for reset_assert") 426c8d85df7a ("firmware: xilinx: Use APIs instead of IOCTLs") 70c0d36462ca ("firmware: xilinx: Remove eemi ops for clock set/get parent") 7a1e10621a21 ("firmware: xilinx: Remove eemi ops for clock set/get rate") 0667a8d144bc ("firmware: xilinx: Remove eemi ops for clock_getdivider") fc9fb8fb985c ("firmware: xilinx: Remove eemi ops for clock_setdivider") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 1527b076ae2cb6a9c590a02725ed39399fcad1cf Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan+linaro(a)kernel.org> Date: Thu, 22 Jun 2023 10:24:35 +0200 Subject: [PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure Make sure that the device is not runtime suspended before explicitly disabling the clocks on probe failure and on driver unbind to avoid a clock enable-count imbalance. Fixes: 9e3a000362ae ("spi: zynqmp: Add pm runtime support") Cc: stable(a)vger.kernel.org # 4.19 Cc: Naga Sureshkumar Relli <naga.sureshkumar.relli(a)xilinx.com> Cc: Shubhrajyoti Datta <shubhrajyoti.datta(a)xilinx.com> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> Link: https://lore.kernel.org/r/Message-Id: <20230622082435.7873-1-johan+linaro(a)kernel.org> Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index fb2ca9b90eab..c309dedfd602 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1342,9 +1342,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) return 0; clk_dis_all: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_dis_pclk: clk_disable_unprepare(xqspi->pclk); @@ -1368,11 +1368,15 @@ static void zynqmp_qspi_remove(struct platform_device *pdev) { struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + pm_runtime_get_sync(&pdev->dev); + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); + + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_disable_unprepare(xqspi->pclk); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_disable(&pdev->dev); } MODULE_DEVICE_TABLE(of, zynqmp_qspi_of_match);

2 years

1
0
0 0

FAILED: patch "[PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 1527b076ae2cb6a9c590a02725ed39399fcad1cf # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100413-clothing-muscular-2532@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: 1527b076ae2c ("spi: zynqmp-gqspi: fix clock imbalance on probe failure") 3ffefa1d9c9e ("spi: zynqmp-gqspi: Convert to platform remove callback returning void") 58eaa7b2d07d ("spi: spi-zynqmp-gqspi: Fix runtime PM imbalance in zynqmp_qspi_probe") 1c26372e5aa9 ("spi: spi-zynqmp-gqspi: Update driver to use spi-mem framework") 91af6eb04a6b ("spi: spi-zynqmp-gqspi: Fix kernel-doc warnings") 4b42b0b49812 ("spi: spi-zynqmp-gqspi: Correct a couple of misspellings in kerneldoc") 4db8180ffe7c ("firmware: xilinx: Remove eemi ops for fpga related APIs") bc86f9c54616 ("firmware: xilinx: Remove eemi ops for aes engine") cbbbda71fe37 ("firmware: xilinx: Remove eemi ops for set_requirement") 07fb1a4619fc ("firmware: xilinx: Remove eemi ops for release_node") bf8b27ed2324 ("firmware: xilinx: Remove eemi ops for request_node") 951d0a97e41c ("firmware: xilinx: Remove eemi ops for set_suspend_mode") 9474da950d1e ("firmware: xilinx: Remove eemi ops for init_finalize") 1b413581fe26 ("firmware: xilinx: Remove eemi ops for reset_get_status") cf23ec353146 ("firmware: xilinx: Remove eemi ops for reset_assert") 426c8d85df7a ("firmware: xilinx: Use APIs instead of IOCTLs") 70c0d36462ca ("firmware: xilinx: Remove eemi ops for clock set/get parent") 7a1e10621a21 ("firmware: xilinx: Remove eemi ops for clock set/get rate") 0667a8d144bc ("firmware: xilinx: Remove eemi ops for clock_getdivider") fc9fb8fb985c ("firmware: xilinx: Remove eemi ops for clock_setdivider") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 1527b076ae2cb6a9c590a02725ed39399fcad1cf Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan+linaro(a)kernel.org> Date: Thu, 22 Jun 2023 10:24:35 +0200 Subject: [PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure Make sure that the device is not runtime suspended before explicitly disabling the clocks on probe failure and on driver unbind to avoid a clock enable-count imbalance. Fixes: 9e3a000362ae ("spi: zynqmp: Add pm runtime support") Cc: stable(a)vger.kernel.org # 4.19 Cc: Naga Sureshkumar Relli <naga.sureshkumar.relli(a)xilinx.com> Cc: Shubhrajyoti Datta <shubhrajyoti.datta(a)xilinx.com> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> Link: https://lore.kernel.org/r/Message-Id: <20230622082435.7873-1-johan+linaro(a)kernel.org> Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index fb2ca9b90eab..c309dedfd602 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1342,9 +1342,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) return 0; clk_dis_all: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_dis_pclk: clk_disable_unprepare(xqspi->pclk); @@ -1368,11 +1368,15 @@ static void zynqmp_qspi_remove(struct platform_device *pdev) { struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + pm_runtime_get_sync(&pdev->dev); + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); + + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_disable_unprepare(xqspi->pclk); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_disable(&pdev->dev); } MODULE_DEVICE_TABLE(of, zynqmp_qspi_of_match);

2 years

1
0
0 0

FAILED: patch "[PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 1527b076ae2cb6a9c590a02725ed39399fcad1cf # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100405-anaconda-unclog-d247@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: 1527b076ae2c ("spi: zynqmp-gqspi: fix clock imbalance on probe failure") 3ffefa1d9c9e ("spi: zynqmp-gqspi: Convert to platform remove callback returning void") 58eaa7b2d07d ("spi: spi-zynqmp-gqspi: Fix runtime PM imbalance in zynqmp_qspi_probe") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 1527b076ae2cb6a9c590a02725ed39399fcad1cf Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan+linaro(a)kernel.org> Date: Thu, 22 Jun 2023 10:24:35 +0200 Subject: [PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure Make sure that the device is not runtime suspended before explicitly disabling the clocks on probe failure and on driver unbind to avoid a clock enable-count imbalance. Fixes: 9e3a000362ae ("spi: zynqmp: Add pm runtime support") Cc: stable(a)vger.kernel.org # 4.19 Cc: Naga Sureshkumar Relli <naga.sureshkumar.relli(a)xilinx.com> Cc: Shubhrajyoti Datta <shubhrajyoti.datta(a)xilinx.com> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> Link: https://lore.kernel.org/r/Message-Id: <20230622082435.7873-1-johan+linaro(a)kernel.org> Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index fb2ca9b90eab..c309dedfd602 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1342,9 +1342,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) return 0; clk_dis_all: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_dis_pclk: clk_disable_unprepare(xqspi->pclk); @@ -1368,11 +1368,15 @@ static void zynqmp_qspi_remove(struct platform_device *pdev) { struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + pm_runtime_get_sync(&pdev->dev); + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); + + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_disable_unprepare(xqspi->pclk); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_disable(&pdev->dev); } MODULE_DEVICE_TABLE(of, zynqmp_qspi_of_match);

2 years

1
0
0 0

FAILED: patch "[PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 1527b076ae2cb6a9c590a02725ed39399fcad1cf # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100458-lullaby-relearn-726c@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: 1527b076ae2c ("spi: zynqmp-gqspi: fix clock imbalance on probe failure") 3ffefa1d9c9e ("spi: zynqmp-gqspi: Convert to platform remove callback returning void") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 1527b076ae2cb6a9c590a02725ed39399fcad1cf Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan+linaro(a)kernel.org> Date: Thu, 22 Jun 2023 10:24:35 +0200 Subject: [PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure Make sure that the device is not runtime suspended before explicitly disabling the clocks on probe failure and on driver unbind to avoid a clock enable-count imbalance. Fixes: 9e3a000362ae ("spi: zynqmp: Add pm runtime support") Cc: stable(a)vger.kernel.org # 4.19 Cc: Naga Sureshkumar Relli <naga.sureshkumar.relli(a)xilinx.com> Cc: Shubhrajyoti Datta <shubhrajyoti.datta(a)xilinx.com> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> Link: https://lore.kernel.org/r/Message-Id: <20230622082435.7873-1-johan+linaro(a)kernel.org> Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index fb2ca9b90eab..c309dedfd602 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1342,9 +1342,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) return 0; clk_dis_all: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_dis_pclk: clk_disable_unprepare(xqspi->pclk); @@ -1368,11 +1368,15 @@ static void zynqmp_qspi_remove(struct platform_device *pdev) { struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + pm_runtime_get_sync(&pdev->dev); + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); + + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_disable_unprepare(xqspi->pclk); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_disable(&pdev->dev); } MODULE_DEVICE_TABLE(of, zynqmp_qspi_of_match);

2 years

1
0
0 0

FAILED: patch "[PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 1527b076ae2cb6a9c590a02725ed39399fcad1cf # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023100450-unnamable-reemerge-dcef@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: 1527b076ae2c ("spi: zynqmp-gqspi: fix clock imbalance on probe failure") 3ffefa1d9c9e ("spi: zynqmp-gqspi: Convert to platform remove callback returning void") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 1527b076ae2cb6a9c590a02725ed39399fcad1cf Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan+linaro(a)kernel.org> Date: Thu, 22 Jun 2023 10:24:35 +0200 Subject: [PATCH] spi: zynqmp-gqspi: fix clock imbalance on probe failure Make sure that the device is not runtime suspended before explicitly disabling the clocks on probe failure and on driver unbind to avoid a clock enable-count imbalance. Fixes: 9e3a000362ae ("spi: zynqmp: Add pm runtime support") Cc: stable(a)vger.kernel.org # 4.19 Cc: Naga Sureshkumar Relli <naga.sureshkumar.relli(a)xilinx.com> Cc: Shubhrajyoti Datta <shubhrajyoti.datta(a)xilinx.com> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> Link: https://lore.kernel.org/r/Message-Id: <20230622082435.7873-1-johan+linaro(a)kernel.org> Signed-off-by: Mark Brown <broonie(a)kernel.org> diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index fb2ca9b90eab..c309dedfd602 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1342,9 +1342,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev) return 0; clk_dis_all: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_dis_pclk: clk_disable_unprepare(xqspi->pclk); @@ -1368,11 +1368,15 @@ static void zynqmp_qspi_remove(struct platform_device *pdev) { struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev); + pm_runtime_get_sync(&pdev->dev); + zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0); + + pm_runtime_disable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); clk_disable_unprepare(xqspi->refclk); clk_disable_unprepare(xqspi->pclk); - pm_runtime_set_suspended(&pdev->dev); - pm_runtime_disable(&pdev->dev); } MODULE_DEVICE_TABLE(of, zynqmp_qspi_of_match);

2 years

1
0
0 0

[PATCH] nfs/super: check NFS_CAP_ACLS instead of the NFS version

by Max Kellermann

This sets SB_POSIXACL only if ACL support is really enabled, instead of always setting SB_POSIXACL if the NFS protocol version theoretically supports ACL. The code comment says "We will [apply the umask] ourselves", but that happens in posix_acl_create() only if the kernel has POSIX ACL support. Without it, posix_acl_create() is an empty dummy function. So let's not pretend we will apply the umask if we can already know that we will never. This fixes a problem where the umask is always ignored in the NFS client when compiled without CONFIG_FS_POSIX_ACL. This is a 4 year old regression caused by commit 013cdf1088d723 which itself was not completely wrong, but failed to consider all the side effects by misdesigned VFS code. Reviewed-by: J. Bruce Fields <bfields(a)redhat.com> Cc: stable(a)vger.kernel.org Signed-off-by: Max Kellermann <max.kellermann(a)ionos.com> --- fs/nfs/super.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0d6473cb00cb..051986b422b0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1064,14 +1064,19 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) * The VFS shouldn't apply the umask to mode bits. * We will do so ourselves when necessary. */ - sb->s_flags |= SB_POSIXACL; + if (NFS_SB(sb)->caps & NFS_CAP_ACLS) { + sb->s_flags |= SB_POSIXACL; + } + sb->s_time_gran = 1; sb->s_time_min = 0; sb->s_time_max = U32_MAX; sb->s_export_op = &nfs_export_ops; break; case 4: - sb->s_flags |= SB_POSIXACL; + if (NFS_SB(sb)->caps & NFS_CAP_ACLS) { + sb->s_flags |= SB_POSIXACL; + } sb->s_time_gran = 1; sb->s_time_min = S64_MIN; sb->s_time_max = S64_MAX; -- 2.39.2

2 years

2
1
0 0

[PATCH v1] can: j1939: Fix UAF in j1939_sk_match_filter during setsockopt(SO_J1939_FILTER)

by Oleksij Rempel

Lock jsk->sk to prevent UAF when setsockopt(..., SO_J1939_FILTER, ...) modifies jsk->filters while receiving packets. Following trace was seen on affected system: ================================================================== BUG: KASAN: slab-use-after-free in j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] Read of size 4 at addr ffff888012144014 by task j1939/350 CPU: 0 PID: 350 Comm: j1939 Tainted: G W OE 6.5.0-rc5 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: print_report+0xd3/0x620 ? kasan_complete_mode_report_info+0x7d/0x200 ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] kasan_report+0xc2/0x100 ? j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] __asan_load4+0x84/0xb0 j1939_sk_recv_match_one+0x1af/0x2d0 [can_j1939] j1939_sk_recv+0x20b/0x320 [can_j1939] ? __kasan_check_write+0x18/0x20 ? __pfx_j1939_sk_recv+0x10/0x10 [can_j1939] ? j1939_simple_recv+0x69/0x280 [can_j1939] ? j1939_ac_recv+0x5e/0x310 [can_j1939] j1939_can_recv+0x43f/0x580 [can_j1939] ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] ? raw_rcv+0x42/0x3c0 [can_raw] ? __pfx_j1939_can_recv+0x10/0x10 [can_j1939] can_rcv_filter+0x11f/0x350 [can] can_receive+0x12f/0x190 [can] ? __pfx_can_rcv+0x10/0x10 [can] can_rcv+0xdd/0x130 [can] ? __pfx_can_rcv+0x10/0x10 [can] __netif_receive_skb_one_core+0x13d/0x150 ? __pfx___netif_receive_skb_one_core+0x10/0x10 ? __kasan_check_write+0x18/0x20 ? _raw_spin_lock_irq+0x8c/0xe0 __netif_receive_skb+0x23/0xb0 process_backlog+0x107/0x260 __napi_poll+0x69/0x310 net_rx_action+0x2a1/0x580 ? __pfx_net_rx_action+0x10/0x10 ? __pfx__raw_spin_lock+0x10/0x10 ? handle_irq_event+0x7d/0xa0 __do_softirq+0xf3/0x3f8 do_softirq+0x53/0x80 </IRQ> <TASK> __local_bh_enable_ip+0x6e/0x70 netif_rx+0x16b/0x180 can_send+0x32b/0x520 [can] ? __pfx_can_send+0x10/0x10 [can] ? __check_object_size+0x299/0x410 raw_sendmsg+0x572/0x6d0 [can_raw] ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] ? apparmor_socket_sendmsg+0x2f/0x40 ? __pfx_raw_sendmsg+0x10/0x10 [can_raw] sock_sendmsg+0xef/0x100 sock_write_iter+0x162/0x220 ? __pfx_sock_write_iter+0x10/0x10 ? __rtnl_unlock+0x47/0x80 ? security_file_permission+0x54/0x320 vfs_write+0x6ba/0x750 ? __pfx_vfs_write+0x10/0x10 ? __fget_light+0x1ca/0x1f0 ? __rcu_read_unlock+0x5b/0x280 ksys_write+0x143/0x170 ? __pfx_ksys_write+0x10/0x10 ? __kasan_check_read+0x15/0x20 ? fpregs_assert_state_consistent+0x62/0x70 __x64_sys_write+0x47/0x60 do_syscall_64+0x60/0x90 ? do_syscall_64+0x6d/0x90 ? irqentry_exit+0x3f/0x50 ? exc_page_fault+0x79/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Allocated by task 348: kasan_save_stack+0x2a/0x50 kasan_set_track+0x29/0x40 kasan_save_alloc_info+0x1f/0x30 __kasan_kmalloc+0xb5/0xc0 __kmalloc_node_track_caller+0x67/0x160 j1939_sk_setsockopt+0x284/0x450 [can_j1939] __sys_setsockopt+0x15c/0x2f0 __x64_sys_setsockopt+0x6b/0x80 do_syscall_64+0x60/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Freed by task 349: kasan_save_stack+0x2a/0x50 kasan_set_track+0x29/0x40 kasan_save_free_info+0x2f/0x50 __kasan_slab_free+0x12e/0x1c0 __kmem_cache_free+0x1b9/0x380 kfree+0x7a/0x120 j1939_sk_setsockopt+0x3b2/0x450 [can_j1939] __sys_setsockopt+0x15c/0x2f0 __x64_sys_setsockopt+0x6b/0x80 do_syscall_64+0x60/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 9d71dd0c70099 ("can: add support of SAE J1939 protocol") Reported-by: Sili Luo <rootlab(a)huawei.com> Suggested-by: Sili Luo <rootlab(a)huawei.com> Signed-off-by: Oleksij Rempel <o.rempel(a)pengutronix.de> Acked-by: Oleksij Rempel <o.rempel(a)pengutronix.de> Cc: stable(a)vger.kernel.org --- net/can/j1939/socket.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b28c976f52a0..2ce24bf78c72 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk, static bool j1939_sk_match_filter(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { - const struct j1939_filter *f = jsk->filters; - int nfilter = jsk->nfilters; + const struct j1939_filter *f; + int nfilter; + + lock_sock(&jsk->sk); + + f = jsk->filters; + nfilter = jsk->nfilters; if (!nfilter) /* receive all when no filters are assigned */ - return true; + goto filter_match_found; for (; nfilter; ++f, --nfilter) { if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) @@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk, continue; if ((skcb->addr.src_name & f->name_mask) != f->name) continue; - return true; + goto filter_match_found; } + + release_sock(&jsk->sk); return false; + +filter_match_found: + release_sock(&jsk->sk); + return true; } static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, -- 2.39.2

2 years

2
2
0 0

[PATCH][6.5, 6.1, 5.15] iommu/arm-smmu-v3: Fix soft lockup triggered by arm_smmu_mm_invalidate_range

by Nicolin Chen

commit d5afb4b47e13161b3f33904d45110f9e6463bad6 upstream. When running an SVA case, the following soft lockup is triggered: -------------------------------------------------------------------- watchdog: BUG: soft lockup - CPU#244 stuck for 26s! pstate: 83400009 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : arm_smmu_cmdq_issue_cmdlist+0x178/0xa50 lr : arm_smmu_cmdq_issue_cmdlist+0x150/0xa50 sp : ffff8000d83ef290 x29: ffff8000d83ef290 x28: 000000003b9aca00 x27: 0000000000000000 x26: ffff8000d83ef3c0 x25: da86c0812194a0e8 x24: 0000000000000000 x23: 0000000000000040 x22: ffff8000d83ef340 x21: ffff0000c63980c0 x20: 0000000000000001 x19: ffff0000c6398080 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: ffff3000b4a3bbb0 x14: ffff3000b4a30888 x13: ffff3000b4a3cf60 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : ffffc08120e4d6bc x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000048cfa x5 : 0000000000000000 x4 : 0000000000000001 x3 : 000000000000000a x2 : 0000000080000000 x1 : 0000000000000000 x0 : 0000000000000001 Call trace: arm_smmu_cmdq_issue_cmdlist+0x178/0xa50 __arm_smmu_tlb_inv_range+0x118/0x254 arm_smmu_tlb_inv_range_asid+0x6c/0x130 arm_smmu_mm_invalidate_range+0xa0/0xa4 __mmu_notifier_invalidate_range_end+0x88/0x120 unmap_vmas+0x194/0x1e0 unmap_region+0xb4/0x144 do_mas_align_munmap+0x290/0x490 do_mas_munmap+0xbc/0x124 __vm_munmap+0xa8/0x19c __arm64_sys_munmap+0x28/0x50 invoke_syscall+0x78/0x11c el0_svc_common.constprop.0+0x58/0x1c0 do_el0_svc+0x34/0x60 el0_svc+0x2c/0xd4 el0t_64_sync_handler+0x114/0x140 el0t_64_sync+0x1a4/0x1a8 -------------------------------------------------------------------- The commit 06ff87bae8d3 ("arm64: mm: remove unused functions and variable protoypes") fixed a similar lockup on the CPU MMU side. Yet, it can occur to SMMU too since arm_smmu_mm_invalidate_range() is typically called next to MMU tlb flush function, e.g. tlb_flush_mmu_tlbonly { tlb_flush { __flush_tlb_range { // check MAX_TLBI_OPS } } mmu_notifier_invalidate_range { arm_smmu_mm_invalidate_range { // does not check MAX_TLBI_OPS } } } Clone a CMDQ_MAX_TLBI_OPS from the MAX_TLBI_OPS in tlbflush.h, since in an SVA case SMMU uses the CPU page table, so it makes sense to align with the tlbflush code. Then, replace per-page TLBI commands with a single per-asid TLBI command, if the request size hits this threshold. Signed-off-by: Nicolin Chen <nicolinc(a)nvidia.com> Link: https://lore.kernel.org/r/20230920052257.8615-1-nicolinc@nvidia.com Signed-off-by: Will Deacon <will(a)kernel.org> --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index a5a63b1c947e..98d3ba7f9487 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -186,6 +186,15 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) } } +/* + * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this + * is used as a threshold to replace per-page TLBI commands to issue in the + * command queue with an address-space TLBI command, when SMMU w/o a range + * invalidation feature handles too many per-page TLBI commands, which will + * otherwise result in a soft lockup. + */ +#define CMDQ_MAX_TLBI_OPS (1 << (PAGE_SHIFT - 3)) + static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) @@ -200,10 +209,22 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, * range. So do a simple translation here by calculating size correctly. */ size = end - start; + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) { + if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE) + size = 0; + } + + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) { + if (!size) + arm_smmu_tlb_inv_asid(smmu_domain->smmu, + smmu_mn->cd->asid); + else + arm_smmu_tlb_inv_range_asid(start, size, + smmu_mn->cd->asid, + PAGE_SIZE, false, + smmu_domain); + } - if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) - arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid, - PAGE_SIZE, false, smmu_domain); arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size); } -- 2.42.0

2 years

3
3
0 0

Request to cherry-pick a few Smack fixes

by Munehisa Kamata

Hi stable maintainers, The following upstream commits fix a couple of overlayfs handling problems in Smack. Could you please cherry-pick them for 4.19.y, 5.4.y, 5.10.y, 5.15.y and 6.1.y? #3 2c085f3a8f23 smack: Record transmuting in smk_transmuted #2 3a3d8fce31a4 smack: Retrieve transmuting information in smack_inode_getsecurity() #1 387ef964460f Smack:- Use overlay inode label in smack_inode_copy_up() # not needed for 6.1.y Note that 4.19.y needs some adjustments. I'll send backported patches separately. I'm not an author of these commits, but have been hitting the problems with multiple kernels based on the trees and it seems worth cherry-picking. Perhaps, it's better to add a tag in cherry-picked commits: Fixes: d6d80cb57be4 ("Smack: Base support for overlayfs") Regards, Munehisa

2 years

2
7
0 0

[PATCH v3] can: sja1000: Always restart the Tx queue after an overrun

by Miquel Raynal

Upstream commit 717c6ec241b5 ("can: sja1000: Prevent overrun stalls with a soft reset on Renesas SoCs") fixes an issue with Renesas own SJA1000 CAN controller reception: the Rx buffer is only 5 messages long, so when the bus loaded (eg. a message every 50us), overrun may easily happen. Upon an overrun situation, due to a possible internal crosstalk situation, the controller enters a frozen state which only can be unlocked with a soft reset (experimentally). The solution was to offload a call to sja1000_start() in a threaded handler. This needs to happen in process context as this operation requires to sleep. sja1000_start() basically enters "reset mode", performs a proper software reset and returns back into "normal mode". Since this fix was introduced, we no longer observe any stalls in reception. However it was sporadically observed that the transmit path would now freeze. Further investigation blamed the fix mentioned above, and especially the reset operation. Reproducing the reset in a loop helped identifying what could possibly go wrong. The sja1000 is a single Tx queue device, which leverages the netdev helpers to process one Tx message at a time. The logic is: the queue is stopped, the message sent to the transceiver, once properly transmitted the controller sets a status bit which triggers an interrupt, in the interrupt handler the transmission status is checked and the queue woken up. Unfortunately, if an overrun happens, we might perform the soft reset precisely between the transmission of the buffer to the transceiver and the advent of the transmission status bit. We would then stop the transmission operation without re-enabling the queue, leading to all further transmissions to be ignored. The reset interrupt can only happen while the device is "open", and after a reset we anyway want to resume normal operations, no matter if a packet to transmit got dropped in the process, so we shall wake up the queue. Restarting the device and waking-up the queue is exactly what sja1000_set_mode(CAN_MODE_START) does. In order to be consistent about the queue state, we must acquire a lock both in the reset handler and in the transmit path to ensure serialization of both operations. It turns out, a lock is already held when entering the transmit path, so we can just acquire/release it as well with the regular net helpers inside the threaded interrupt handler and this way we should be safe. As the reset handler might still be called after the transmission of a frame to the transceiver but before it actually gets transmitted, we must ensure we don't leak the skb, so we free it (the behavior is consistent, no matter if there was an skb on the stack or not). Fixes: 717c6ec241b5 ("can: sja1000: Prevent overrun stalls with a soft reset on Renesas SoCs") Cc: stable(a)vger.kernel.org Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com> --- Changes in v3: * Fix new implementation by just acquiring the tx lock when required. Changes in v2: * As Marc sugested, use netif_tx_{,un}lock() instead of our own spin_lock. drivers/net/can/sja1000/sja1000.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index ae47fc72aa96..9531684d47cd 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -396,7 +396,13 @@ static irqreturn_t sja1000_reset_interrupt(int irq, void *dev_id) struct net_device *dev = (struct net_device *)dev_id; netdev_dbg(dev, "performing a soft reset upon overrun\n"); - sja1000_start(dev); + + netif_tx_lock(dev); + + can_free_echo_skb(dev, 0); + sja1000_set_mode(dev, CAN_MODE_START); + + netif_tx_unlock(dev); return IRQ_HANDLED; } -- 2.34.1

2 years

2
2
0 0

[PATCH 13/16] drm/amd/display: Don't set dpms_off for seamless boot

by Tom Chung

From: Daniel Miess <daniel.miess(a)amd.com> [Why] eDPs fail to light up with seamless boot enabled [How] When seamless boot is enabled don't configure dpms_off in disable_vbios_mode_if_required. Reviewed-by: Charlene Liu <charlene.liu(a)amd.com> Cc: Mario Limonciello <mario.limonciello(a)amd.com> Cc: Alex Deucher <alexander.deucher(a)amd.com> Cc: stable(a)vger.kernel.org Acked-by: Tom Chung <chiahsuan.chung(a)amd.com> Signed-off-by: Daniel Miess <daniel.miess(a)amd.com> --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index bd4834f921c1..88d41bf6d53a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1230,6 +1230,9 @@ static void disable_vbios_mode_if_required( if (stream == NULL) continue; + if (stream->apply_seamless_boot_optimization) + continue; + // only looking for first odm pipe if (pipe->prev_odm_pipe) continue; -- 2.25.1

2 years

1
0
0 0

[PATCH 05/16] drm/amd/display: Revert "drm/amd/display: remove duplicated edp relink to fastboot"

by Tom Chung

From: Aric Cyr <aric.cyr(a)amd.com> Revert commit a0b8a2c85d1b ("drm/amd/display: remove duplicated edp relink to fastboot") Because it cause 4k EDP not light up on boot Reviewed-by: Tom Chung <chiahsuan.chung(a)amd.com> Cc: Mario Limonciello <mario.limonciello(a)amd.com> Cc: Alex Deucher <alexander.deucher(a)amd.com> Cc: stable(a)vger.kernel.org Acked-by: Tom Chung <chiahsuan.chung(a)amd.com> Signed-off-by: Aric Cyr <aric.cyr(a)amd.com> --- drivers/gpu/drm/amd/display/dc/core/dc.c | 59 ++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 63e97fb0a478..17a36953d3a9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1213,6 +1213,64 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) dc_release_state(current_ctx); } +static void disable_vbios_mode_if_required( + struct dc *dc, + struct dc_state *context) +{ + unsigned int i, j; + + /* check if timing_changed, disable stream*/ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct dc_stream_state *stream = NULL; + struct dc_link *link = NULL; + struct pipe_ctx *pipe = NULL; + + pipe = &context->res_ctx.pipe_ctx[i]; + stream = pipe->stream; + if (stream == NULL) + continue; + + // only looking for first odm pipe + if (pipe->prev_odm_pipe) + continue; + + if (stream->link->local_sink && + stream->link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { + link = stream->link; + } + + if (link != NULL && link->link_enc->funcs->is_dig_enabled(link->link_enc)) { + unsigned int enc_inst, tg_inst = 0; + unsigned int pix_clk_100hz; + + enc_inst = link->link_enc->funcs->get_dig_frontend(link->link_enc); + if (enc_inst != ENGINE_ID_UNKNOWN) { + for (j = 0; j < dc->res_pool->stream_enc_count; j++) { + if (dc->res_pool->stream_enc[j]->id == enc_inst) { + tg_inst = dc->res_pool->stream_enc[j]->funcs->dig_source_otg( + dc->res_pool->stream_enc[j]); + break; + } + } + + dc->res_pool->dp_clock_source->funcs->get_pixel_clk_frequency_100hz( + dc->res_pool->dp_clock_source, + tg_inst, &pix_clk_100hz); + + if (link->link_status.link_active) { + uint32_t requested_pix_clk_100hz = + pipe->stream_res.pix_clk_params.requested_pix_clk_100hz; + + if (pix_clk_100hz != requested_pix_clk_100hz) { + dc->link_srv->set_dpms_off(pipe); + pipe->stream->dpms_off = false; + } + } + } + } + } +} + static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context) { int i; @@ -1782,6 +1840,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_streams[i] = context->streams[i]; if (!dcb->funcs->is_accelerated_mode(dcb)) { + disable_vbios_mode_if_required(dc, context); dc->hwss.enable_accelerated_mode(dc, context); } -- 2.25.1

2 years

1
0
0 0

v6.5 backport request: 6d2779ecaeb56f92 ("locking/atomic: scripts: fix fallback ifdeffery")

by Mark Rutland

Hi, Could we please backport commit: 6d2779ecaeb56f92d7105c56772346c71c88c278 ("locking/atomic: scripts: fix fallback ifdeffery") ... to the 6.5.y stable tree? I forgot to Cc stable when I submitted the original patch, and had (mistakenly) assumed that the Fixes tag was sufficient. The patch fixes a dentry cache corruption issue observed on arm64 and which is in theory possible on other architectures. I've recevied an off-list report from someone who's hit the issue on the v6.5.y tree specifically. Thanks, Mark

2 years

2
2
0 0

Re: [PATCH] usb: core: add bos NULL pointer checking condition

by Greg KH

On Wed, Oct 04, 2023 at 03:26:42PM +0900, Woo-kwang Lee wrote: > This issue occurs when connecting Galaxy S22 and abnormal SEC Dex Adapter. > When the abnormal adapter is connected, kernel panic always occurs after a > few seconds. > This occurs due to unable to get BOS descriptor, usb_release_bos_descriptor > set dev->bos = NULL. > > - usb_reset_and_verify_device > - hub_port_init > - usb_release_bos_descriptor > - dev->bos = NULL; > > hub_port_connect_change() calls portspeed(), and portspeed() calls hub_is_s > uperspeedplus(). > Finally, hub_is_superspeedplus() calls hdev->bos->ssp_cap. > It needs to check hdev->bos is NULL to prevent a kernel panic. > > usb 3-1: new SuperSpeed Gen 1 USB device number 16 using xhci-hcd-exynos > usb 3-1: unable to get BOS descriptor set > usb 3-1: Product: USB3.0 Hub > Unable to handle kernel NULL pointer dereference at virtual address 0000018 > > Call trace: > hub_port_connect_change+0x8c/0x538 > port_event+0x244/0x764 > hub_event+0x158/0x474 > process_one_work+0x204/0x550 > worker_thread+0x28c/0x580 > kthread+0x13c/0x178 > ret_from_fork+0x10/0x30 > > - hub_port_connect_change > - portspeed > - hub_is_superspeedplus > > Fixes: 0cdd49a1d1a4 ("usb: Support USB 3.1 extended port status request") > Signed-off-by: Woo-kwang Lee <wookwang.lee(a)samsung.com> > --- > drivers/usb/core/hub.h | 2 ++ > 1 file changed, 2 insertions(+) Are you sure this isn't already fixed by commit f74a7afc224a ("usb: hub: Guard against accesses to uninitialized BOS descriptors") in linux-next? thanks, greg k-h

2 years

3
3
0 0

[PATCH] drm/mediatek: Correctly free sg_table in gem prime vmap

by Chen-Yu Tsai

The MediaTek DRM driver implements GEM PRIME vmap by fetching the sg_table for the object, iterating through the pages, and then vmapping them. In essence, unlike the GEM DMA helpers which vmap when the object is first created or imported, the MediaTek version does it on request. Unfortunately, the code never correctly frees the sg_table contents. This results in a kernel memory leak. On a Hayato device with a text console on the internal display, this results in the system running out of memory in a few days from all the console screen cursor updates. Add sg_free_table() to correctly free the contents of the sg_table. This was missing despite explicitly required by mtk_gem_prime_get_sg_table(). Fixes: 3df64d7b0a4f ("drm/mediatek: Implement gem prime vmap/vunmap function") Cc: <stable(a)vger.kernel.org> Signed-off-by: Chen-Yu Tsai <wenst(a)chromium.org> --- Please merge for v6.6 fixes. Also, I was wondering why the MediaTek DRM driver implements a lot of the GEM functionality itself, instead of using the GEM DMA helpers. From what I could tell, the code closely follows the DMA helpers, except that it vmaps the buffers only upon request. drivers/gpu/drm/mediatek/mtk_drm_gem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index 9f364df52478..297ee090e02e 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -239,6 +239,7 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) npages = obj->size >> PAGE_SHIFT; mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL); if (!mtk_gem->pages) { + sg_free_table(sgt); kfree(sgt); return -ENOMEM; } @@ -248,11 +249,13 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); if (!mtk_gem->kvaddr) { + sg_free_table(sgt); kfree(sgt); kfree(mtk_gem->pages); return -ENOMEM; } out: + sg_free_table(sgt); kfree(sgt); iosys_map_set_vaddr(map, mtk_gem->kvaddr); -- 2.42.0.582.g8ccd20d70d-goog

2 years

3
3
0 0

bluetooth issues since kernel 6.4 - not discovering other bt devices - /linux/drivers/bluetooth/btusb.c

by Erik Dobák

Hello! I bought a new laptop fujitsu life book and everything is going fine on artix just the bt makes trouble: /var/log/error.log Sep 30 18:43:48 nexus bluetoothd[2266]: src/adapter.c:reset_adv_monitors_complete() Failed to reset Adv Monitors: Failed (0x03) Sep 30 18:43:48 nexus bluetoothd[2266]: Failed to clear UUIDs: Failed (0x03) Sep 30 18:43:48 nexus bluetoothd[2266]: Failed to add UUID: Failed (0x03) Sep 30 18:43:48 nexus bluetoothd[2266]: Failed to add UUID: Failed (0x03) i searched a bit the webs and found a new commit at kernel org that does do the trouble: https://bugs.archlinux.org/task/78980 follow the linkeys inside the commits there or read this one: ---------------before------------------------------------ /* interface numbers are hardcoded in the spec */ if (intf->cur_altsetting->desc.bInterfaceNumber != 0) { if (!(id->driver_info & BTUSB_IFNUM_2)) return -ENODEV; if (intf->cur_altsetting->desc.bInterfaceNumber != 2) return -ENODEV; } -----------after---------------------------------------------------- if ((id->driver_info & BTUSB_IFNUM_2) && (intf->cur_altsetting->desc.bInterfaceNumber != 0) && (intf->cur_altsetting->desc.bInterfaceNumber != 2)) return -ENODEV; -------------------------------------------------------- the dude just hooked up 3 conditions in a row with && where before it was 2 conditions in 1 condition. + the comment was removed. please reconsider this commit. Yours E

2 years

5
10
0 0

Re: [PATCH] usb: core: add bos NULL pointer checking condition

by kernel test robot

Hi, Thanks for your patch. FYI: kernel test robot notices the stable kernel rule is not satisfied. The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html/#op… Rule: add the tag "Cc: stable(a)vger.kernel.org" in the sign-off area to have the patch automatically included in the stable tree. Subject: [PATCH] usb: core: add bos NULL pointer checking condition Link: https://lore.kernel.org/stable/20231004062642.16431-1-wookwang.lee%40samsun… -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

2 years

1
0
0 0

+ hugetlbfs-close-race-between-madv_dontneed-and-page-fault.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: hugetlbfs: close race between MADV_DONTNEED and page fault has been added to the -mm mm-hotfixes-unstable branch. Its filename is hugetlbfs-close-race-between-madv_dontneed-and-page-fault.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Rik van Riel <riel(a)surriel.com> Subject: hugetlbfs: close race between MADV_DONTNEED and page fault Date: Tue, 3 Oct 2023 23:25:04 -0400 Malloc libraries, like jemalloc and tcalloc, take decisions on when to call madvise independently from the code in the main application. This sometimes results in the application page faulting on an address, right after the malloc library has shot down the backing memory with MADV_DONTNEED. Usually this is harmless, because we always have some 4kB pages sitting around to satisfy a page fault. However, with hugetlbfs systems often allocate only the exact number of huge pages that the application wants. Due to TLB batching, hugetlbfs MADV_DONTNEED will free pages outside of any lock taken on the page fault path, which can open up the following race condition: CPU 1 CPU 2 MADV_DONTNEED unmap page shoot down TLB entry page fault fail to allocate a huge page killed with SIGBUS free page Fix that race by pulling the locking from __unmap_hugepage_final_range into helper functions called from zap_page_range_single. This ensures page faults stay locked out of the MADV_DONTNEED VMA until the huge pages have actually been freed. Link: https://lkml.kernel.org/r/20231004032814.3108383-3-riel@surriel.com Fixes: 04ada095dcfc ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing") Signed-off-by: Rik van Riel <riel(a)surriel.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Mike Kravetz <mike.kravetz(a)oracle.com> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/hugetlb.h | 35 +++++++++++++++++++++++++++++++++-- mm/hugetlb.c | 28 ++++++++++++++++------------ mm/memory.c | 13 ++++++++----- 3 files changed, 57 insertions(+), 19 deletions(-) --- a/include/linux/hugetlb.h~hugetlbfs-close-race-between-madv_dontneed-and-page-fault +++ a/include/linux/hugetlb.h @@ -139,7 +139,7 @@ struct page *hugetlb_follow_page_mask(st void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *, zap_flags_t); -void __unmap_hugepage_range_final(struct mmu_gather *tlb, +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags); @@ -246,6 +246,25 @@ int huge_pmd_unshare(struct mm_struct *m void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); +extern void __hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *begin, unsigned long *end); +extern void __hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details); + +static inline void hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_begin(vma, start, end); +} + +static inline void hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details) +{ + if (is_vm_hugetlb_page(vma)) + __hugetlb_zap_end(vma, details); +} + void hugetlb_vma_lock_read(struct vm_area_struct *vma); void hugetlb_vma_unlock_read(struct vm_area_struct *vma); void hugetlb_vma_lock_write(struct vm_area_struct *vma); @@ -298,6 +317,18 @@ static inline void adjust_range_if_pmd_s { } +static inline void hugetlb_zap_begin( + struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) +{ +} + +static inline void hugetlb_zap_end( + struct vm_area_struct *vma, + struct zap_details *details) +{ +} + static inline struct page *hugetlb_follow_page_mask( struct vm_area_struct *vma, unsigned long address, unsigned int flags, unsigned int *page_mask) @@ -443,7 +474,7 @@ static inline long hugetlb_change_protec return 0; } -static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, +static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page, zap_flags_t zap_flags) --- a/mm/hugetlb.c~hugetlbfs-close-race-between-madv_dontneed-and-page-fault +++ a/mm/hugetlb.c @@ -5348,9 +5348,9 @@ int move_hugetlb_page_tables(struct vm_a return len + old_addr - old_end; } -static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long start, unsigned long end, - struct page *ref_page, zap_flags_t zap_flags) +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct page *ref_page, zap_flags_t zap_flags) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -5479,16 +5479,19 @@ static void __unmap_hugepage_range(struc tlb_flush_mmu_tlbonly(tlb); } -void __unmap_hugepage_range_final(struct mmu_gather *tlb, - struct vm_area_struct *vma, unsigned long start, - unsigned long end, struct page *ref_page, - zap_flags_t zap_flags) +void __hugetlb_zap_begin(struct vm_area_struct *vma, + unsigned long *start, unsigned long *end) { + adjust_range_if_pmd_sharing_possible(vma, start, end); hugetlb_vma_lock_write(vma); - i_mmap_lock_write(vma->vm_file->f_mapping); + if (vma->vm_file) + i_mmap_lock_write(vma->vm_file->f_mapping); +} - /* mmu notification performed in caller */ - __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); +void __hugetlb_zap_end(struct vm_area_struct *vma, + struct zap_details *details) +{ + zap_flags_t zap_flags = details ? details->zap_flags : 0; if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ /* @@ -5501,11 +5504,12 @@ void __unmap_hugepage_range_final(struct * someone else. */ __hugetlb_vma_unlock_write_free(vma); - i_mmap_unlock_write(vma->vm_file->f_mapping); } else { - i_mmap_unlock_write(vma->vm_file->f_mapping); hugetlb_vma_unlock_write(vma); } + + if (vma->vm_file) + i_mmap_unlock_write(vma->vm_file->f_mapping); } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, --- a/mm/memory.c~hugetlbfs-close-race-between-madv_dontneed-and-page-fault +++ a/mm/memory.c @@ -1692,7 +1692,7 @@ static void unmap_single_vma(struct mmu_ if (vma->vm_file) { zap_flags_t zap_flags = details ? details->zap_flags : 0; - __unmap_hugepage_range_final(tlb, vma, start, end, + __unmap_hugepage_range(tlb, vma, start, end, NULL, zap_flags); } } else @@ -1737,8 +1737,12 @@ void unmap_vmas(struct mmu_gather *tlb, start_addr, end_addr); mmu_notifier_invalidate_range_start(&range); do { - unmap_single_vma(tlb, vma, start_addr, end_addr, &details, + unsigned long start = start_addr; + unsigned long end = end_addr; + hugetlb_zap_begin(vma, &start, &end); + unmap_single_vma(tlb, vma, start, end, &details, mm_wr_locked); + hugetlb_zap_end(vma, &details); } while ((vma = mas_find(mas, tree_end - 1)) != NULL); mmu_notifier_invalidate_range_end(&range); } @@ -1762,9 +1766,7 @@ void zap_page_range_single(struct vm_are lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, address, end); - if (is_vm_hugetlb_page(vma)) - adjust_range_if_pmd_sharing_possible(vma, &range.start, - &range.end); + hugetlb_zap_begin(vma, &range.start, &range.end); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); @@ -1775,6 +1777,7 @@ void zap_page_range_single(struct vm_are unmap_single_vma(&tlb, vma, address, end, details, false); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); + hugetlb_zap_end(vma, details); } /** _ Patches currently in -mm which might be from riel(a)surriel.com are hugetlbfs-extend-hugetlb_vma_lock-to-private-vmas.patch hugetlbfs-close-race-between-madv_dontneed-and-page-fault.patch

2 years

1
0
0 0

+ hugetlbfs-extend-hugetlb_vma_lock-to-private-vmas.patch added to mm-hotfixes-unstable branch

by Andrew Morton

The patch titled Subject: hugetlbfs: extend hugetlb_vma_lock to private VMAs has been added to the -mm mm-hotfixes-unstable branch. Its filename is hugetlbfs-extend-hugetlb_vma_lock-to-private-vmas.patch This patch will shortly appear at https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche… This patch will later appear in the mm-hotfixes-unstable branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next via the mm-everything branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm and is updated there every 2-3 working days ------------------------------------------------------ From: Rik van Riel <riel(a)surriel.com> Subject: hugetlbfs: extend hugetlb_vma_lock to private VMAs Date: Tue, 3 Oct 2023 23:25:03 -0400 Extend the locking scheme used to protect shared hugetlb mappings from truncate vs page fault races, in order to protect private hugetlb mappings (with resv_map) against MADV_DONTNEED. Add a read-write semaphore to the resv_map data structure, and use that from the hugetlb_vma_(un)lock_* functions, in preparation for closing the race between MADV_DONTNEED and page faults. Link: https://lkml.kernel.org/r/20231004032814.3108383-2-riel@surriel.com Fixes: 04ada095dcfc ("hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing") Signed-off-by: Rik van Riel <riel(a)surriel.com> Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Muchun Song <muchun.song(a)linux.dev> Cc: Rik van Riel <riel(a)surriel.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> --- include/linux/hugetlb.h | 6 +++++ mm/hugetlb.c | 41 ++++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 4 deletions(-) --- a/include/linux/hugetlb.h~hugetlbfs-extend-hugetlb_vma_lock-to-private-vmas +++ a/include/linux/hugetlb.h @@ -60,6 +60,7 @@ struct resv_map { long adds_in_progress; struct list_head region_cache; long region_cache_count; + struct rw_semaphore rw_sema; #ifdef CONFIG_CGROUP_HUGETLB /* * On private mappings, the counter to uncharge reservations is stored @@ -1246,6 +1247,11 @@ static inline bool __vma_shareable_lock( return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; } +static inline bool __vma_private_lock(struct vm_area_struct *vma) +{ + return (!(vma->vm_flags & VM_MAYSHARE)) && vma->vm_private_data; +} + /* * Safe version of huge_pte_offset() to check the locks. See comments * above huge_pte_offset(). --- a/mm/hugetlb.c~hugetlbfs-extend-hugetlb_vma_lock-to-private-vmas +++ a/mm/hugetlb.c @@ -97,6 +97,7 @@ static void hugetlb_vma_lock_alloc(struc static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); static void hugetlb_unshare_pmds(struct vm_area_struct *vma, unsigned long start, unsigned long end); +static struct resv_map *vma_resv_map(struct vm_area_struct *vma); static inline bool subpool_is_free(struct hugepage_subpool *spool) { @@ -267,6 +268,10 @@ void hugetlb_vma_lock_read(struct vm_are struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; down_read(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + down_read(&resv_map->rw_sema); } } @@ -276,6 +281,10 @@ void hugetlb_vma_unlock_read(struct vm_a struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; up_read(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + up_read(&resv_map->rw_sema); } } @@ -285,6 +294,10 @@ void hugetlb_vma_lock_write(struct vm_ar struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; down_write(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + down_write(&resv_map->rw_sema); } } @@ -294,17 +307,27 @@ void hugetlb_vma_unlock_write(struct vm_ struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; up_write(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + up_write(&resv_map->rw_sema); } } int hugetlb_vma_trylock_write(struct vm_area_struct *vma) { - struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - if (!__vma_shareable_lock(vma)) - return 1; + if (__vma_shareable_lock(vma)) { + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; - return down_write_trylock(&vma_lock->rw_sema); + return down_write_trylock(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + return down_write_trylock(&resv_map->rw_sema); + } + + return 1; } void hugetlb_vma_assert_locked(struct vm_area_struct *vma) @@ -313,6 +336,10 @@ void hugetlb_vma_assert_locked(struct vm struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; lockdep_assert_held(&vma_lock->rw_sema); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + lockdep_assert_held(&resv_map->rw_sema); } } @@ -345,6 +372,11 @@ static void __hugetlb_vma_unlock_write_f struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; __hugetlb_vma_unlock_write_put(vma_lock); + } else if (__vma_private_lock(vma)) { + struct resv_map *resv_map = vma_resv_map(vma); + + /* no free for anon vmas, but still need to unlock */ + up_write(&resv_map->rw_sema); } } @@ -1061,6 +1093,7 @@ struct resv_map *resv_map_alloc(void) kref_init(&resv_map->refs); spin_lock_init(&resv_map->lock); INIT_LIST_HEAD(&resv_map->regions); + init_rwsem(&resv_map->rw_sema); resv_map->adds_in_progress = 0; /* _ Patches currently in -mm which might be from riel(a)surriel.com are hugetlbfs-extend-hugetlb_vma_lock-to-private-vmas.patch hugetlbfs-close-race-between-madv_dontneed-and-page-fault.patch

2 years

1
0
0 0

[PATCH v2][6.5, 6.1, 5.15] iommu/arm-smmu-v3: Fix soft lockup triggered by arm_smmu_mm_invalidate_range

by Nicolin Chen

commit d5afb4b47e13161b3f33904d45110f9e6463bad6 upstream. When running an SVA case, the following soft lockup is triggered: -------------------------------------------------------------------- watchdog: BUG: soft lockup - CPU#244 stuck for 26s! pstate: 83400009 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : arm_smmu_cmdq_issue_cmdlist+0x178/0xa50 lr : arm_smmu_cmdq_issue_cmdlist+0x150/0xa50 sp : ffff8000d83ef290 x29: ffff8000d83ef290 x28: 000000003b9aca00 x27: 0000000000000000 x26: ffff8000d83ef3c0 x25: da86c0812194a0e8 x24: 0000000000000000 x23: 0000000000000040 x22: ffff8000d83ef340 x21: ffff0000c63980c0 x20: 0000000000000001 x19: ffff0000c6398080 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: ffff3000b4a3bbb0 x14: ffff3000b4a30888 x13: ffff3000b4a3cf60 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : ffffc08120e4d6bc x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000048cfa x5 : 0000000000000000 x4 : 0000000000000001 x3 : 000000000000000a x2 : 0000000080000000 x1 : 0000000000000000 x0 : 0000000000000001 Call trace: arm_smmu_cmdq_issue_cmdlist+0x178/0xa50 __arm_smmu_tlb_inv_range+0x118/0x254 arm_smmu_tlb_inv_range_asid+0x6c/0x130 arm_smmu_mm_invalidate_range+0xa0/0xa4 __mmu_notifier_invalidate_range_end+0x88/0x120 unmap_vmas+0x194/0x1e0 unmap_region+0xb4/0x144 do_mas_align_munmap+0x290/0x490 do_mas_munmap+0xbc/0x124 __vm_munmap+0xa8/0x19c __arm64_sys_munmap+0x28/0x50 invoke_syscall+0x78/0x11c el0_svc_common.constprop.0+0x58/0x1c0 do_el0_svc+0x34/0x60 el0_svc+0x2c/0xd4 el0t_64_sync_handler+0x114/0x140 el0t_64_sync+0x1a4/0x1a8 -------------------------------------------------------------------- The commit 06ff87bae8d3 ("arm64: mm: remove unused functions and variable protoypes") fixed a similar lockup on the CPU MMU side. Yet, it can occur to SMMU too since arm_smmu_mm_invalidate_range() is typically called next to MMU tlb flush function, e.g. tlb_flush_mmu_tlbonly { tlb_flush { __flush_tlb_range { // check MAX_TLBI_OPS } } mmu_notifier_invalidate_range { arm_smmu_mm_invalidate_range { // does not check MAX_TLBI_OPS } } } Clone a CMDQ_MAX_TLBI_OPS from the MAX_TLBI_OPS in tlbflush.h, since in an SVA case SMMU uses the CPU page table, so it makes sense to align with the tlbflush code. Then, replace per-page TLBI commands with a single per-asid TLBI command, if the request size hits this threshold. Signed-off-by: Nicolin Chen <nicolinc(a)nvidia.com> Link: https://lore.kernel.org/r/20230920052257.8615-1-nicolinc@nvidia.com Signed-off-by: Will Deacon <will(a)kernel.org> Cc: stable(a)vger.kernel.org --- Changelog v2 * Added missing "Cc: stable(a)vger.kernel.org" line v1 https://lore.kernel.org/stable/20231003233549.33678-1-nicolinc@nvidia.com .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index a5a63b1c947e..98d3ba7f9487 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -186,6 +186,15 @@ static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) } } +/* + * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this + * is used as a threshold to replace per-page TLBI commands to issue in the + * command queue with an address-space TLBI command, when SMMU w/o a range + * invalidation feature handles too many per-page TLBI commands, which will + * otherwise result in a soft lockup. + */ +#define CMDQ_MAX_TLBI_OPS (1 << (PAGE_SHIFT - 3)) + static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) @@ -200,10 +209,22 @@ static void arm_smmu_mm_invalidate_range(struct mmu_notifier *mn, * range. So do a simple translation here by calculating size correctly. */ size = end - start; + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_RANGE_INV)) { + if (size >= CMDQ_MAX_TLBI_OPS * PAGE_SIZE) + size = 0; + } + + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) { + if (!size) + arm_smmu_tlb_inv_asid(smmu_domain->smmu, + smmu_mn->cd->asid); + else + arm_smmu_tlb_inv_range_asid(start, size, + smmu_mn->cd->asid, + PAGE_SIZE, false, + smmu_domain); + } - if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) - arm_smmu_tlb_inv_range_asid(start, size, smmu_mn->cd->asid, - PAGE_SIZE, false, smmu_domain); arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, start, size); } -- 2.42.0

2 years

1
0
0 0

[PATCH v21] PCI: Avoid D3 at suspend for AMD PCIe root ports w/ USB4 controllers

by Mario Limonciello

Iain reports that USB devices can't be used to wake a Lenovo Z13 from suspend. This occurs because on some AMD platforms, even though the Root Ports advertise PME_Support for D3hot and D3cold, they don't handle PME messages and generate wakeup interrupts from those states when amd-pmc has put the platform in a hardware sleep state. Iain reported this on an AMD Rembrandt platform, but it also affects Phoenix SoCs. On Iain's system, a USB4 router below the affected Root Port generates the PME. To avoid this issue, disable D3 for the root port associated with USB4 controllers at suspend time. Restore D3 support at resume so that it can be used by runtime suspend. The amd-pmc driver doesn't put the platform in a hardware sleep state for runtime suspend, so PMEs work as advertised. Cc: stable(a)vger.kernel.org # 6.1.y: 70b70a4: PCI/sysfs: Protect driver's D3cold preference from user space Cc: stable(a)vger.kernel.org # 6.5.y: 70b70a4: PCI/sysfs: Protect driver's D3cold preference from user space Cc: stable(a)vger.kernel.org # 6.6.y: 70b70a4: PCI/sysfs: Protect driver's D3cold preference from user space Link: https://learn.microsoft.com/en-us/windows-hardware/design/device-experience… [1] Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend") Reported-by: Iain Lane <iain(a)orangesquash.org.uk> Closes: https://forums.lenovo.com/t5/Ubuntu/Z13-can-t-resume-from-suspend-with-exte… Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com> --- v20-v21: * Rewrite commit message, lifting most of what Bjorn clipped down to on v20. * Use pci_d3cold_disable()/pci_d3cold_enable() instead * Do the quirk on the USB4 controller instead of RP->USB->RP --- drivers/pci/quirks.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index eeec1d6f9023..5674065011e7 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -6188,3 +6188,47 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9a31, dpc_log_size); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5020, of_pci_make_dev_node); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_XILINX, 0x5021, of_pci_make_dev_node); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT, 0x0005, of_pci_make_dev_node); + +#ifdef CONFIG_SUSPEND +/* + * Root Ports on some AMD SoCs advertise PME_Support for D3hot and D3cold, but + * if the SoC is put into a hardware sleep state by the amd-pmc driver, the + * Root Ports don't generate wakeup interrupts for USB devices. + * + * When suspending, disable D3 support for the Root Port so we don't use it. + * Restore D3 support when resuming. + */ +static void quirk_enable_rp_d3cold(struct pci_dev *dev) +{ + pci_d3cold_enable(pcie_find_root_port(dev)); +} + +static void quirk_disable_rp_d3cold_suspend(struct pci_dev *dev) +{ + struct pci_dev *rp; + + /* + * PM_SUSPEND_ON means we're doing runtime suspend, which means + * amd-pmc will not be involved so PMEs during D3 work as advertised. + * + * The PMEs *do* work if amd-pmc doesn't put the SoC in the hardware + * sleep state, but we assume amd-pmc is always present. + */ + if (pm_suspend_target_state == PM_SUSPEND_ON) + return; + + rp = pcie_find_root_port(dev); + pci_d3cold_disable(rp); + dev_info_once(&rp->dev, "quirk: disabling D3cold for suspend\n"); +} +/* Rembrandt (yellow_carp) */ +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x162e, quirk_disable_rp_d3cold_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x162e, quirk_enable_rp_d3cold); +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x162f, quirk_disable_rp_d3cold_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x162f, quirk_enable_rp_d3cold); +/* Phoenix (pink_sardine) */ +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x1668, quirk_disable_rp_d3cold_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1668, quirk_enable_rp_d3cold); +DECLARE_PCI_FIXUP_SUSPEND(PCI_VENDOR_ID_AMD, 0x1669, quirk_disable_rp_d3cold_suspend); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1669, quirk_enable_rp_d3cold); +#endif /* CONFIG_SUSPEND */ -- 2.34.1

2 years

4
12
0 0

[PATCH v2] ACPI: FPDT: properly handle invalid FPDT subtables

by Vasily Khoruzhick

Buggy BIOSes may have invalid FPDT subtables, e.g. on my hardware: S3PT subtable: 7F20FE30: 53 33 50 54 24 00 00 00-00 00 00 00 00 00 18 01 *S3PT$...........* 7F20FE40: 00 00 00 00 00 00 00 00-00 00 00 00 00 00 00 00 *................* 7F20FE50: 00 00 00 00 Here the first record has zero length. FBPT subtable: 7F20FE50: 46 42 50 54-3C 00 00 00 46 42 50 54 *....FBPT<...FBPT* 7F20FE60: 02 00 30 02 00 00 00 00-00 00 00 00 00 00 00 00 *..0.............* 7F20FE70: 2A A6 BC 6E 0B 00 00 00-1A 44 41 70 0B 00 00 00 **..n.....DAp....* 7F20FE80: 00 00 00 00 00 00 00 00-00 00 00 00 00 00 00 00 *................* And here FBPT table has FBPT signature repeated instead of the first record. Current code will be looping indefinitely due to zero length records, so break out of the loop if record length is zero. While we are here, add proper handling for fpdt_process_subtable() failures. Fixes: d1eb86e59be0 ("ACPI: tables: introduce support for FPDT table") Cc: stable(a)vger.kernel.org Signed-off-by: Vasily Khoruzhick <anarsoul(a)gmail.com> --- v2: return error from fpdt_process_subtable() if zero-length record is found and handle fpdt_process_subtable() failures drivers/acpi/acpi_fpdt.c | 42 ++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/acpi_fpdt.c b/drivers/acpi/acpi_fpdt.c index a2056c4c8cb7..c97c6e3936cc 100644 --- a/drivers/acpi/acpi_fpdt.c +++ b/drivers/acpi/acpi_fpdt.c @@ -194,12 +194,19 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) record_header = (void *)subtable_header + offset; offset += record_header->length; + if (!record_header->length) { + pr_err(FW_BUG "Zero-length record found.\n"); + result = -EINVAL; + goto err; + } + switch (record_header->type) { case RECORD_S3_RESUME: if (subtable_type != SUBTABLE_S3PT) { pr_err(FW_BUG "Invalid record %d for subtable %s\n", record_header->type, signature); - return -EINVAL; + result = -EINVAL; + goto err; } if (record_resume) { pr_err("Duplicate resume performance record found.\n"); @@ -208,7 +215,7 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) record_resume = (struct resume_performance_record *)record_header; result = sysfs_create_group(fpdt_kobj, &resume_attr_group); if (result) - return result; + goto err; break; case RECORD_S3_SUSPEND: if (subtable_type != SUBTABLE_S3PT) { @@ -223,13 +230,14 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) record_suspend = (struct suspend_performance_record *)record_header; result = sysfs_create_group(fpdt_kobj, &suspend_attr_group); if (result) - return result; + goto err; break; case RECORD_BOOT: if (subtable_type != SUBTABLE_FBPT) { pr_err(FW_BUG "Invalid %d for subtable %s\n", record_header->type, signature); - return -EINVAL; + result = -EINVAL; + goto err; } if (record_boot) { pr_err("Duplicate boot performance record found.\n"); @@ -238,7 +246,7 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) record_boot = (struct boot_performance_record *)record_header; result = sysfs_create_group(fpdt_kobj, &boot_attr_group); if (result) - return result; + goto err; break; default: @@ -247,6 +255,16 @@ static int fpdt_process_subtable(u64 address, u32 subtable_type) } } return 0; + +err: + if (record_boot) + sysfs_remove_group(fpdt_kobj, &boot_attr_group); + if (record_suspend) + sysfs_remove_group(fpdt_kobj, &suspend_attr_group); + if (record_resume) + sysfs_remove_group(fpdt_kobj, &resume_attr_group); + + return result; } static int __init acpi_init_fpdt(void) @@ -255,6 +273,7 @@ static int __init acpi_init_fpdt(void) struct acpi_table_header *header; struct fpdt_subtable_entry *subtable; u32 offset = sizeof(*header); + int result; status = acpi_get_table(ACPI_SIG_FPDT, 0, &header); @@ -263,8 +282,8 @@ static int __init acpi_init_fpdt(void) fpdt_kobj = kobject_create_and_add("fpdt", acpi_kobj); if (!fpdt_kobj) { - acpi_put_table(header); - return -ENOMEM; + result = -ENOMEM; + goto err_nomem; } while (offset < header->length) { @@ -272,8 +291,10 @@ static int __init acpi_init_fpdt(void) switch (subtable->type) { case SUBTABLE_FBPT: case SUBTABLE_S3PT: - fpdt_process_subtable(subtable->address, + result = fpdt_process_subtable(subtable->address, subtable->type); + if (result) + goto err_subtable; break; default: /* Other types are reserved in ACPI 6.4 spec. */ @@ -282,6 +303,11 @@ static int __init acpi_init_fpdt(void) offset += sizeof(*subtable); } return 0; +err_subtable: + kobject_put(fpdt_kobj); +err_nomem: + acpi_put_table(header); + return result; } fs_initcall(acpi_init_fpdt); -- 2.42.0

2 years

2
1
0 0

[PATCH v2 1/1] tee: amdtee: fix use-after-free vulnerability in amdtee_close_session

by Rijo Thomas

There is a potential race condition in amdtee_close_session that may cause use-after-free in amdtee_open_session. For instance, if a session has refcount == 1, and one thread tries to free this session via: kref_put(&sess->refcount, destroy_session); the reference count will get decremented, and the next step would be to call destroy_session(). However, if in another thread, amdtee_open_session() is called before destroy_session() has completed execution, alloc_session() may return 'sess' that will be freed up later in destroy_session() leading to use-after-free in amdtee_open_session. To fix this issue, treat decrement of sess->refcount and removal of 'sess' from session list in destroy_session() as a critical section, so that it is executed atomically. Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Cc: stable(a)vger.kernel.org Signed-off-by: Rijo Thomas <Rijo-john.Thomas(a)amd.com> --- v2: * Introduced kref_put_mutex() as suggested by Sumit Garg. drivers/tee/amdtee/core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index 372d64756ed6..3c15f6a9e91c 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -217,12 +217,12 @@ static int copy_ta_binary(struct tee_context *ctx, void *ptr, void **ta, return rc; } +/* mutex must be held by caller */ static void destroy_session(struct kref *ref) { struct amdtee_session *sess = container_of(ref, struct amdtee_session, refcount); - mutex_lock(&session_list_mutex); list_del(&sess->list_node); mutex_unlock(&session_list_mutex); kfree(sess); @@ -272,7 +272,8 @@ int amdtee_open_session(struct tee_context *ctx, if (arg->ret != TEEC_SUCCESS) { pr_err("open_session failed %d\n", arg->ret); handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); + kref_put_mutex(&sess->refcount, destroy_session, + &session_list_mutex); goto out; } @@ -290,7 +291,8 @@ int amdtee_open_session(struct tee_context *ctx, pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS); handle_close_session(ta_handle, session_info); handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); + kref_put_mutex(&sess->refcount, destroy_session, + &session_list_mutex); rc = -ENOMEM; goto out; } @@ -331,7 +333,7 @@ int amdtee_close_session(struct tee_context *ctx, u32 session) handle_close_session(ta_handle, session_info); handle_unload_ta(ta_handle); - kref_put(&sess->refcount, destroy_session); + kref_put_mutex(&sess->refcount, destroy_session, &session_list_mutex); return 0; } -- 2.25.1

2 years

3
2
0 0

[PATCH 0/2] [5.15, 5.10, 5,4] watchdog: iTCO: Backport of handle_boot_enabled=0 fix

by Jan Kiszka

This suggests a commit (and a follow-up fix for it) from 5.16+ for stable because it fixes the usage of watchdog.handle_boot_enabled=0 for iTCO, closing a monitoring gap in OTA update scenarios. These patches are applicable to and have been tested with 5.15, 5.10 and 5.4 stable heads. Jan Cc: Malin Jonsson <malin.jonsson(a)ericsson.com> Mika Westerberg (2): watchdog: iTCO_wdt: No need to stop the timer in probe watchdog: iTCO_wdt: Set NO_REBOOT if the watchdog is not already running drivers/watchdog/iTCO_wdt.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) -- 2.35.3

2 years

2
3
0 0

Backport commit dad651b2a44e ("nvme-pci: do not set the NUMA node of device if it has none")

by Pratyush Yadav

Hi, Please backport commit dad651b2a44e ("nvme-pci: do not set the NUMA node of device if it has none") to stable branches 6.1, 5.15, 5.10, 5.4, 4.19, and 4.14. The commit fixes a4aea5623d4a ("NVMe: Convert to blk-mq") which was introduced in v3.19 but I forgot to add this when sending the patch. -- Regards, Pratyush Yadav Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879

2 years

2
1
0 0

[PATCH 7/7] ASoC: codecs: wcd938x-sdw: fix runtime PM imbalance on probe errors

by Johan Hovold

Make sure to balance the runtime PM operations, including the disable count, on probe errors and on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable(a)vger.kernel.org # 5.14 Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- sound/soc/codecs/wcd938x-sdw.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index 1baea04480e2..a1f04010da95 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1278,7 +1278,18 @@ static int wcd9380_probe(struct sdw_slave *pdev, pm_runtime_set_active(dev); pm_runtime_enable(dev); - return component_add(dev, &wcd938x_sdw_component_ops); + ret = component_add(dev, &wcd938x_sdw_component_ops); + if (ret) + goto err_disable_rpm; + + return 0; + +err_disable_rpm: + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); + + return ret; } static int wcd9380_remove(struct sdw_slave *pdev) @@ -1287,6 +1298,10 @@ static int wcd9380_remove(struct sdw_slave *pdev) component_del(dev, &wcd938x_sdw_component_ops); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); + return 0; } -- 2.41.0

2 years

1
0
0 0

[PATCH 6/7] ASoC: codecs: wcd938x-sdw: fix use after free on driver unbind

by Johan Hovold

Make sure to deregister the component when the driver is being unbound and before the underlying device-managed resources are freed. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable(a)vger.kernel.org # 5.14 Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- sound/soc/codecs/wcd938x-sdw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index 6951120057e5..1baea04480e2 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1281,6 +1281,15 @@ static int wcd9380_probe(struct sdw_slave *pdev, return component_add(dev, &wcd938x_sdw_component_ops); } +static int wcd9380_remove(struct sdw_slave *pdev) +{ + struct device *dev = &pdev->dev; + + component_del(dev, &wcd938x_sdw_component_ops); + + return 0; +} + static const struct sdw_device_id wcd9380_slave_id[] = { SDW_SLAVE_ENTRY(0x0217, 0x10d, 0), {}, @@ -1320,6 +1329,7 @@ static const struct dev_pm_ops wcd938x_sdw_pm_ops = { static struct sdw_driver wcd9380_codec_driver = { .probe = wcd9380_probe, + .remove = wcd9380_remove, .ops = &wcd9380_slave_ops, .id_table = wcd9380_slave_id, .driver = { -- 2.41.0

2 years

1
0
0 0

[PATCH 5/7] ASoC: codecs: wcd938x: fix runtime PM imbalance on remove

by Johan Hovold

Make sure to balance the runtime PM operations, including the disable count, on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable(a)vger.kernel.org # 5.14 Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- sound/soc/codecs/wcd938x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 679c627f7eaa..d27b919c63b4 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3620,9 +3620,15 @@ static int wcd938x_probe(struct platform_device *pdev) static void wcd938x_remove(struct platform_device *pdev) { - struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev); + struct device *dev = &pdev->dev; + struct wcd938x_priv *wcd938x = dev_get_drvdata(dev); + + component_master_del(dev, &wcd938x_comp_ops); + + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); - component_master_del(&pdev->dev, &wcd938x_comp_ops); regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); } -- 2.41.0

2 years

1
0
0 0

[PATCH 4/7] ASoC: codecs: wcd938x: fix regulator leaks on probe errors

by Johan Hovold

Make sure to disable and free the regulators on probe errors and on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable(a)vger.kernel.org # 5.14 Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- sound/soc/codecs/wcd938x.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 7e0b07eeed77..679c627f7eaa 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3325,8 +3325,10 @@ static int wcd938x_populate_dt_data(struct wcd938x_priv *wcd938x, struct device return dev_err_probe(dev, ret, "Failed to get supplies\n"); ret = regulator_bulk_enable(WCD938X_MAX_SUPPLY, wcd938x->supplies); - if (ret) + if (ret) { + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); return dev_err_probe(dev, ret, "Failed to enable supplies\n"); + } wcd938x_dt_parse_micbias_info(dev, wcd938x); @@ -3592,13 +3594,13 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_add_slave_components(wcd938x, dev, &match); if (ret) - return ret; + goto err_disable_regulators; wcd938x_reset(wcd938x); ret = component_master_add_with_match(dev, &wcd938x_comp_ops, match); if (ret) - return ret; + goto err_disable_regulators; pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_use_autosuspend(dev); @@ -3608,11 +3610,21 @@ static int wcd938x_probe(struct platform_device *pdev) pm_runtime_idle(dev); return 0; + +err_disable_regulators: + regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); + + return ret; } static void wcd938x_remove(struct platform_device *pdev) { + struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev); + component_master_del(&pdev->dev, &wcd938x_comp_ops); + regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); } #if defined(CONFIG_OF) -- 2.41.0

2 years

1
0
0 0

[PATCH 3/7] ASoC: codecs: wcd938x: fix resource leaks on bind errors

by Johan Hovold

Add the missing code to release resources on bind errors, including the references taken by wcd938x_sdw_device_get() which also need to be dropped on unbind(). Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable(a)vger.kernel.org # 5.14 Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- sound/soc/codecs/wcd938x.c | 44 +++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index c617fc3ce489..7e0b07eeed77 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3435,7 +3435,8 @@ static int wcd938x_bind(struct device *dev) wcd938x->rxdev = wcd938x_sdw_device_get(wcd938x->rxnode); if (!wcd938x->rxdev) { dev_err(dev, "could not find slave with matching of node\n"); - return -EINVAL; + ret = -EINVAL; + goto err_unbind; } wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev); wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x; @@ -3443,7 +3444,8 @@ static int wcd938x_bind(struct device *dev) wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode); if (!wcd938x->txdev) { dev_err(dev, "could not find txslave with matching of node\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_rxdev; } wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; @@ -3454,31 +3456,35 @@ static int wcd938x_bind(struct device *dev) if (!device_link_add(wcd938x->rxdev, wcd938x->txdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink tx and rx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_txdev; } if (!device_link_add(dev, wcd938x->txdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink wcd and tx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_rxtx_link; } if (!device_link_add(dev, wcd938x->rxdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink wcd and rx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_tx_link; } wcd938x->regmap = dev_get_regmap(&wcd938x->tx_sdw_dev->dev, NULL); if (!wcd938x->regmap) { dev_err(dev, "could not get TX device regmap\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_rx_link; } ret = wcd938x_irq_init(wcd938x, dev); if (ret) { dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret); - return ret; + goto err_remove_rx_link; } wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; @@ -3487,17 +3493,33 @@ static int wcd938x_bind(struct device *dev) ret = wcd938x_set_micbias_data(wcd938x); if (ret < 0) { dev_err(dev, "%s: bad micbias pdata\n", __func__); - return ret; + goto err_remove_rx_link; } ret = snd_soc_register_component(dev, &soc_codec_dev_wcd938x, wcd938x_dais, ARRAY_SIZE(wcd938x_dais)); - if (ret) + if (ret) { dev_err(dev, "%s: Codec registration failed\n", __func__); + goto err_remove_rx_link; + } - return ret; + return 0; +err_remove_rx_link: + device_link_remove(dev, wcd938x->rxdev); +err_remove_tx_link: + device_link_remove(dev, wcd938x->txdev); +err_remove_rxtx_link: + device_link_remove(wcd938x->rxdev, wcd938x->txdev); +err_put_txdev: + put_device(wcd938x->txdev); +err_put_rxdev: + put_device(wcd938x->rxdev); +err_unbind: + component_unbind_all(dev, wcd938x); + + return ret; } static void wcd938x_unbind(struct device *dev) @@ -3508,6 +3530,8 @@ static void wcd938x_unbind(struct device *dev) device_link_remove(dev, wcd938x->txdev); device_link_remove(dev, wcd938x->rxdev); device_link_remove(wcd938x->rxdev, wcd938x->txdev); + put_device(wcd938x->txdev); + put_device(wcd938x->rxdev); component_unbind_all(dev, wcd938x); } -- 2.41.0

2 years

1
0
0 0

[PATCH 2/7] ASoC: codecs: wcd938x: fix unbind tear down order

by Johan Hovold

Make sure to deregister the component before tearing down the resources it depends on during unbind(). Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable(a)vger.kernel.org # 5.14 Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index cf1eaf678fc2..c617fc3ce489 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3504,10 +3504,10 @@ static void wcd938x_unbind(struct device *dev) { struct wcd938x_priv *wcd938x = dev_get_drvdata(dev); + snd_soc_unregister_component(dev); device_link_remove(dev, wcd938x->txdev); device_link_remove(dev, wcd938x->rxdev); device_link_remove(wcd938x->rxdev, wcd938x->txdev); - snd_soc_unregister_component(dev); component_unbind_all(dev, wcd938x); } -- 2.41.0

2 years

1
0
0 0

[PATCH 1/7] ASoC: codecs: wcd938x: drop bogus bind error handling

by Johan Hovold

Drop the bogus error handling for a soundwire device backcast during bind() that cannot fail. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable(a)vger.kernel.org # 5.14 Cc: Srinivas Kandagatla <srinivas.kandagatla(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- sound/soc/codecs/wcd938x.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index a3c680661377..cf1eaf678fc2 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3448,10 +3448,6 @@ static int wcd938x_bind(struct device *dev) wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev); - if (!wcd938x->tx_sdw_dev) { - dev_err(dev, "could not get txslave with matching of dev\n"); - return -EINVAL; - } /* As TX is main CSR reg interface, which should not be suspended first. * expicilty add the dependency link */ -- 2.41.0

2 years

1
0
0 0

[PATCH 2/5] mfd: qcom-spmi-pmic: fix revid implementation

by Johan Hovold

The Qualcomm SPMI PMIC revid implementation is broken in multiple ways. First, it assumes that just because the sibling base device has been registered that means that it is also bound to a driver, which may not be the case (e.g. due to probe deferral or asynchronous probe). This could trigger a NULL-pointer dereference when attempting to access the driver data of the unbound device. Second, it accesses driver data of a sibling device directly and without any locking, which means that the driver data may be freed while it is being accessed (e.g. on driver unbind). Third, it leaks a struct device reference to the sibling device which is looked up using the spmi_device_from_of() every time a function (child) device is calling the revid function (e.g. on probe). Fix this mess by reimplementing the revid lookup so that it is done only at probe of the PMIC device; the base device fetches the revid info from the hardware, while any secondary SPMI device fetches the information from the base device and caches it so that it can be accessed safely from its children. If the base device has not been probed yet then probe of a secondary device is deferred. Fixes: e9c11c6e3a0e ("mfd: qcom-spmi-pmic: expose the PMIC revid information to clients") Cc: stable(a)vger.kernel.org # 6.0 Cc: Caleb Connolly <caleb.connolly(a)linaro.org> Cc: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- drivers/mfd/qcom-spmi-pmic.c | 69 +++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/drivers/mfd/qcom-spmi-pmic.c b/drivers/mfd/qcom-spmi-pmic.c index 47738f7e492c..8e449cff5cec 100644 --- a/drivers/mfd/qcom-spmi-pmic.c +++ b/drivers/mfd/qcom-spmi-pmic.c @@ -30,6 +30,8 @@ struct qcom_spmi_dev { struct qcom_spmi_pmic pmic; }; +static DEFINE_MUTEX(pmic_spmi_revid_lock); + #define N_USIDS(n) ((void *)n) static const struct of_device_id pmic_spmi_id_table[] = { @@ -76,24 +78,21 @@ static const struct of_device_id pmic_spmi_id_table[] = { * * This only supports PMICs with 1 or 2 USIDs. */ -static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) +static struct spmi_device *qcom_pmic_get_base_usid(struct spmi_device *sdev, struct qcom_spmi_dev *ctx) { - struct spmi_device *sdev; - struct qcom_spmi_dev *ctx; struct device_node *spmi_bus; struct device_node *child; int function_parent_usid, ret; u32 pmic_addr; - sdev = to_spmi_device(dev); - ctx = dev_get_drvdata(&sdev->dev); - /* * Quick return if the function device is already in the base * USID. This will always be hit for PMICs with only 1 USID. */ - if (sdev->usid % ctx->num_usids == 0) + if (sdev->usid % ctx->num_usids == 0) { + get_device(&sdev->dev); return sdev; + } function_parent_usid = sdev->usid; @@ -118,10 +117,8 @@ static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) sdev = spmi_device_from_of(child); if (!sdev) { /* - * If the base USID for this PMIC hasn't probed yet - * but the secondary USID has, then we need to defer - * the function driver so that it will attempt to - * probe again when the base USID is ready. + * If the base USID for this PMIC hasn't been + * registered yet then we need to defer. */ sdev = ERR_PTR(-EPROBE_DEFER); } @@ -135,6 +132,35 @@ static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) return sdev; } +static int pmic_spmi_get_base_revid(struct spmi_device *sdev, struct qcom_spmi_dev *ctx) +{ + struct qcom_spmi_dev *base_ctx; + struct spmi_device *base; + int ret = 0; + + base = qcom_pmic_get_base_usid(sdev, ctx); + if (IS_ERR(base)) + return PTR_ERR(base); + + /* + * Copy revid info from base device if it has probed and is still + * bound to its driver. + */ + mutex_lock(&pmic_spmi_revid_lock); + base_ctx = spmi_device_get_drvdata(base); + if (!base_ctx) { + ret = -EPROBE_DEFER; + goto out_unlock; + } + memcpy(&ctx->pmic, &base_ctx->pmic, sizeof(ctx->pmic)); +out_unlock: + mutex_unlock(&pmic_spmi_revid_lock); + + put_device(&base->dev); + + return ret; +} + static int pmic_spmi_load_revid(struct regmap *map, struct device *dev, struct qcom_spmi_pmic *pmic) { @@ -210,11 +236,7 @@ const struct qcom_spmi_pmic *qcom_pmic_get(struct device *dev) if (!of_match_device(pmic_spmi_id_table, dev->parent)) return ERR_PTR(-EINVAL); - sdev = qcom_pmic_get_base_usid(dev->parent); - - if (IS_ERR(sdev)) - return ERR_CAST(sdev); - + sdev = to_spmi_device(dev->parent); spmi = dev_get_drvdata(&sdev->dev); return &spmi->pmic; @@ -249,16 +271,31 @@ static int pmic_spmi_probe(struct spmi_device *sdev) ret = pmic_spmi_load_revid(regmap, &sdev->dev, &ctx->pmic); if (ret < 0) return ret; + } else { + ret = pmic_spmi_get_base_revid(sdev, ctx); + if (ret) + return ret; } + + mutex_lock(&pmic_spmi_revid_lock); spmi_device_set_drvdata(sdev, ctx); + mutex_unlock(&pmic_spmi_revid_lock); return devm_of_platform_populate(&sdev->dev); } +static void pmic_spmi_remove(struct spmi_device *sdev) +{ + mutex_lock(&pmic_spmi_revid_lock); + spmi_device_set_drvdata(sdev, NULL); + mutex_unlock(&pmic_spmi_revid_lock); +} + MODULE_DEVICE_TABLE(of, pmic_spmi_id_table); static struct spmi_driver pmic_spmi_driver = { .probe = pmic_spmi_probe, + .remove = pmic_spmi_remove, .driver = { .name = "pmic-spmi", .of_match_table = pmic_spmi_id_table, -- 2.41.0

2 years

1
0
0 0

[PATCH 1/5] mfd: qcom-spmi-pmic: fix reference leaks in revid helper

by Johan Hovold

The Qualcomm SPMI PMIC revid implementation is broken in multiple ways. First, it totally ignores struct device_node reference counting and leaks references to the parent bus node as well as each child it iterates over using an open-coded for_each_child_of_node(). Second, it leaks references to each spmi device on the bus that it iterates over by failing to drop the reference taken by the spmi_device_from_of() helper. Fix the struct device_node leaks by reimplementing the lookup using for_each_child_of_node() and adding the missing reference count decrements. Fix the sibling struct device leaks by dropping the unnecessary lookups of devices with the wrong USID. Note that this still leaves one struct device reference leak in case a base device is found but it is not the parent of the device used for the lookup. This will be addressed in a follow-on patch. Fixes: e9c11c6e3a0e ("mfd: qcom-spmi-pmic: expose the PMIC revid information to clients") Cc: stable(a)vger.kernel.org # 6.0 Cc: Caleb Connolly <caleb.connolly(a)linaro.org> Cc: Dmitry Baryshkov <dmitry.baryshkov(a)linaro.org> Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org> --- drivers/mfd/qcom-spmi-pmic.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/mfd/qcom-spmi-pmic.c b/drivers/mfd/qcom-spmi-pmic.c index 7e2cd79d17eb..47738f7e492c 100644 --- a/drivers/mfd/qcom-spmi-pmic.c +++ b/drivers/mfd/qcom-spmi-pmic.c @@ -81,7 +81,7 @@ static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) struct spmi_device *sdev; struct qcom_spmi_dev *ctx; struct device_node *spmi_bus; - struct device_node *other_usid = NULL; + struct device_node *child; int function_parent_usid, ret; u32 pmic_addr; @@ -105,28 +105,34 @@ static struct spmi_device *qcom_pmic_get_base_usid(struct device *dev) * device for USID 2. */ spmi_bus = of_get_parent(sdev->dev.of_node); - do { - other_usid = of_get_next_child(spmi_bus, other_usid); - - ret = of_property_read_u32_index(other_usid, "reg", 0, &pmic_addr); - if (ret) - return ERR_PTR(ret); + sdev = ERR_PTR(-ENODATA); + for_each_child_of_node(spmi_bus, child) { + ret = of_property_read_u32_index(child, "reg", 0, &pmic_addr); + if (ret) { + of_node_put(child); + sdev = ERR_PTR(ret); + break; + } - sdev = spmi_device_from_of(other_usid); if (pmic_addr == function_parent_usid - (ctx->num_usids - 1)) { - if (!sdev) + sdev = spmi_device_from_of(child); + if (!sdev) { /* * If the base USID for this PMIC hasn't probed yet * but the secondary USID has, then we need to defer * the function driver so that it will attempt to * probe again when the base USID is ready. */ - return ERR_PTR(-EPROBE_DEFER); - return sdev; + sdev = ERR_PTR(-EPROBE_DEFER); + } + of_node_put(child); + break; } - } while (other_usid->sibling); + } + + of_node_put(spmi_bus); - return ERR_PTR(-ENODATA); + return sdev; } static int pmic_spmi_load_revid(struct regmap *map, struct device *dev, -- 2.41.0

2 years

1
0
0 0

[PATCH 6.5] ASoC: cs35l56: Call pm_runtime_dont_use_autosuspend()

by Richard Fitzgerald

commit ec03804552e9a723569e14d2512f36a8e70dc640 upstream Driver remove() must call pm_runtime_dont_use_autosuspend(). Drivers that call pm_runtime_use_autosuspend() must disable it in driver remove(). Unfortunately until recently this was only mentioned in 1 line in a 900+ line document so most people hadn't noticed this. It has only recently been added to the kerneldoc of pm_runtime_use_autosuspend(). Backport note: This is the same change as the upstream commit but the cs35l56->base.dev argument in the upstream code is cs35l56->dev in older releases. Signed-off-by: Richard Fitzgerald <rf(a)opensource.cirrus.com> Link: https://lore.kernel.org/r/20230908101716.2658582-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown <broonie(a)kernel.org> --- Please apply to 6.5. --- sound/soc/codecs/cs35l56.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index fd06b9f9d496..7e241908b5f1 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1594,6 +1594,7 @@ void cs35l56_remove(struct cs35l56_private *cs35l56) flush_workqueue(cs35l56->dsp_wq); destroy_workqueue(cs35l56->dsp_wq); + pm_runtime_dont_use_autosuspend(cs35l56->dev); pm_runtime_suspend(cs35l56->dev); pm_runtime_disable(cs35l56->dev); -- 2.30.2

2 years

2
1
0 0

Re: [PATCH 0/5] MediaTek: Update MAINTAINERS and dts fixes for v6.7

by AngeloGioacchino Del Regno

Il 03/10/23 14:45, Sasha Levin ha scritto: > On Tue, Oct 03, 2023 at 01:13:42PM +0200, AngeloGioacchino Del Regno wrote: >> Hello SoC maintainers, >> >> As suggested by Arnd in [1], I am sending an update to the MAINTAINERS >> file, adding myself to the MediaTek SoC maintainers, and some urgent >> bugfixes for MediaTek devicetrees. > > Just curious: if it's urgent, and there are patches cc'ed to stable, why > wait for 6.7 rather than one of the 6.6 -rc cycles? > I may have misunderstood what Arnd said - in that case, I'm sorry. As for the fixes, I would be happier if those could land in v6.6-rcX. Regards, Angelo

2 years

1
0
0 0

Re: [PATCH 5.10] btrfs: fix region size in count_bitmap_extents

by Анастасия Любимова

03/10/23 15:39, Anastasia Belova пишет: > > 14/09/23 12:45, Anastasia Belova пишет: >> count_bitmap_extents was deleted in version 5.11, but >> there is possible mistake in versions 5.6-5.10. >> >> Region size should be calculated by subtracting >> the end from the beginning. > Do I understand correctly that bytes should decrease and > (rs - re) is always negative and should be replaced by (re - rs)? >> Found by Linux Verification Center (linuxtesting.org) with SVACE. >> >> Fixes: dfb79ddb130e ("btrfs: track discardable extents for async discard") >> Signed-off-by: Anastasia Belova<abelova(a)astralinux.ru> >> --- >> fs/btrfs/free-space-cache.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c >> index 4989c60b1df9..a34e266a0969 100644 >> --- a/fs/btrfs/free-space-cache.c >> +++ b/fs/btrfs/free-space-cache.c >> @@ -1930,7 +1930,7 @@ static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl, >> >> bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0, >> BITS_PER_BITMAP) { >> - bytes -= (rs - re) * ctl->unit; >> + bytes -= (re - rs) * ctl->unit; >> count++; >> >> if (!bytes) > Anastasia Sorry for sending html accidentally

2 years

1
0
0 0

[PATCH 0/5] MediaTek: Update MAINTAINERS and dts fixes for v6.7

by AngeloGioacchino Del Regno

Hello SoC maintainers, As suggested by Arnd in [1], I am sending an update to the MAINTAINERS file, adding myself to the MediaTek SoC maintainers, and some urgent bugfixes for MediaTek devicetrees. mt6358 PMIC - Bogus property dropped mt7622/mt7986a - Fix dtbs_check for t-phy nodes mt8195: - Fix early boot failure when DSU PMU enabled - Fix memory layout for demo board Regards, Angelo --- AngeloGioacchino Del Regno (1): MAINTAINERS: Add Angelo as MediaTek SoC co-maintainer Eugen Hristev (1): arm64: dts: mediatek: fix t-phy unit name Macpaul Lin (2): arm64: dts: mediatek: mt8195-demo: fix the memory size to 8GB arm64: dts: mediatek: mt8195-demo: update and reorder reserved memory regions Nícolas F. R. A. Prado (1): arm64: dts: mediatek: mt8195: Set DSU PMU status to fail MAINTAINERS | 2 +- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 2 +- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 2 +- arch/arm64/boot/dts/mediatek/mt8195-demo.dts | 39 ++++++++++++++++++++++------ arch/arm64/boot/dts/mediatek/mt8195.dtsi | 1 + 5 files changed, 35 insertions(+), 11 deletions(-) --- base-commit: 0bb80ecc33a8fb5a682236443c1e740d5c917d1d change-id: 20231003-mediatek-fixes-v6-7-341b5cfdc07a Best regards, -- AngeloGioacchino Del Regno <angelogioacchino.delregno(a)collabora.com>

2 years

1
3
0 0

[PATCH 5.10 0/4] cgroup: Fix suspicious rcu_dereference_check() warning

by ovidiu.panait＠windriver.com

From: Ovidiu Panait <ovidiu.panait(a)windriver.com> When booting the 5.10-stable kernel on the zcu102 board with CONFIG_PROVE_RCU=y, the following warning is present: ============================= WARNING: suspicious RCU usage 5.10.194-yocto-standard #1 Not tainted ----------------------------- include/linux/cgroup.h:495 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 9 locks held by kworker/2:2/106: #0: ffffff8800048948 ((wq_completion)events){..}-{0:0}, at: process_one_work+0x1f8/0x634 #1: ffffffc014c4bda8 (deferred_probe_work){..}-{0:0}, at: process_one_work+0x1f8/0x634 #2: ffffff880005d9a0 (&dev->mutex){....}-{3:3}, at: __device_attach+0x40/0x1c0 #3: ffffffc011c70cb0 (cpu_hotplug_lock){++++}-{0:0}, at: cpus_read_lock+0x18/0x24 #4: ffffff8800b10928 (subsys mutex#5){..}-{3:3}, at: subsys_interface_register+0x58/0x120 #5: ffffff8805e78c00 (&policy->rwsem){..}-{3:3}, at: cpufreq_online+0x590/0x960 #6: ffffffc012a9e770 (cpuset_mutex){..}-{3:3}, at: cpuset_lock+0x24/0x30 #7: ffffff880567bd80 (&p->pi_lock){..}-{2:2}, at: task_rq_lock+0x44/0xf0 #8: ffffff887aff50d8 (&rq->lock){..}-{2:2}, at: task_rq_lock+0x5c/0xf0 stack backtrace: CPU: 2 PID: 106 Comm: kworker/2:2 Not tainted 5.10.194-yocto-standard #1 Hardware name: ZynqMP ZCU102 Rev1.0 (DT) Workqueue: events deferred_probe_work_func Call trace: dump_backtrace+0x0/0x1a4 show_stack+0x20/0x2c dump_stack+0xf0/0x13c lockdep_rcu_suspicious+0xe4/0xf8 inc_dl_tasks_cs+0xb8/0xbc switched_to_dl+0x38/0x280 __sched_setscheduler+0x204/0x860 sched_setattr_nocheck+0x20/0x30 sugov_init+0x1b8/0x380 cpufreq_init_governor.part.0+0x60/0xe0 cpufreq_set_policy+0x1d0/0x33c cpufreq_online+0x35c/0x960 cpufreq_add_dev+0x8c/0xa0 subsys_interface_register+0x10c/0x120 cpufreq_register_driver+0x148/0x2a4 dt_cpufreq_probe+0x288/0x3d0 platform_drv_probe+0x5c/0xb0 really_probe+0xe0/0x4ac driver_probe_device+0x60/0xf4 __device_attach_driver+0xc0/0x12c bus_for_each_drv+0x80/0xe0 __device_attach+0xb0/0x1c0 device_initial_probe+0x1c/0x30 bus_probe_device+0xa8/0xb0 deferred_probe_work_func+0x94/0xd0 process_one_work+0x2b8/0x634 worker_thread+0x7c/0x474 kthread+0x154/0x160 ret_from_fork+0x10/0x34 The warning was introduced in v5.10.193 by commit: 5ac05ce56843 "(sched/cpuset: Keep track of SCHED_DEADLINE task in cpusets)" This issue was also reported for 5.15 here: https://lore.kernel.org/lkml/CA+G9fYv9xTu4bKJGy=e=KZSG5pZ+tJAmfZr=0dbuKNs=9… Backport commit f2aa197e4794 ("cgroup: Fix suspicious rcu_dereference_check() usage warning") and its dependencies to get rid of this warning. Andrey Ryabinin (1): sched/cpuacct: Fix user/system in shown cpuacct.usage* Chengming Zhou (3): sched/cpuacct: Fix charge percpu cpuusage sched/cpuacct: Optimize away RCU read lock cgroup: Fix suspicious rcu_dereference_check() usage warning include/linux/cgroup.h | 3 +- kernel/sched/cpuacct.c | 84 +++++++++++++++++------------------------- 2 files changed, 35 insertions(+), 52 deletions(-) -- 2.31.1

2 years

2
5
0 0

[PATCH 1/4] megaraid_sas: Increase register read retry rount from 3 to 30 for selected registers

by Chandrakanth patil

In BMC environments with concurrent access to multiple registers, certain registers occasionally yield a value of 0 even after 3 retries due to hardware errata. As a fix, we have extended the retry count from 3 to 30. The same errata applies to the mpt3sas driver, and a similar patch has been accepted. Please find more details in the mpt3sas patch reference link. Link: https://lore.kernel.org/r/20230829090020.5417-2-ranjan.kumar@broadcom.com Fixes: 272652fcbf1a ("scsi: megaraid_sas: add retry logic in megasas_readl") Cc: stable(a)vger.kernel.org Signed-off-by: Chandrakanth patil <chandrakanth.patil(a)broadcom.com> Signed-off-by: Sumit Saxena <sumit.saxena(a)broadcom.com> --- drivers/scsi/megaraid/megaraid_sas_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index e1aa667dae66..3d4f13da1ae8 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -263,13 +263,13 @@ u32 megasas_readl(struct megasas_instance *instance, * Fusion registers could intermittently return all zeroes. * This behavior is transient in nature and subsequent reads will * return valid value. As a workaround in driver, retry readl for - * upto three times until a non-zero value is read. + * up to thirty times until a non-zero value is read. */ if (instance->adapter_type == AERO_SERIES) { do { ret_val = readl(addr); i++; - } while (ret_val == 0 && i < 3); + } while (ret_val == 0 && i < 30); return ret_val; } else { return readl(addr); -- 2.39.3

2 years

1
0
0 0

[PATCH net,v2, 1/3] net: mana: Fix TX CQE error handling

by Haiyang Zhang

For an unknown TX CQE error type (probably from a newer hardware), still free the SKB, update the queue tail, etc., otherwise the accounting will be wrong. Also, TX errors can be triggered by injecting corrupted packets, so replace the WARN_ONCE to ratelimited error logging. Cc: stable(a)vger.kernel.org Fixes: ca9c54d2d6a5 ("net: mana: Add a driver for Microsoft Azure Network Adapter (MANA)") Signed-off-by: Haiyang Zhang <haiyangz(a)microsoft.com> Reviewed-by: Simon Horman <horms(a)kernel.org> --- drivers/net/ethernet/microsoft/mana/mana_en.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 4a16ebff3d1d..5cdcf7561b38 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1317,19 +1317,23 @@ static void mana_poll_tx_cq(struct mana_cq *cq) case CQE_TX_VPORT_IDX_OUT_OF_RANGE: case CQE_TX_VPORT_DISABLED: case CQE_TX_VLAN_TAGGING_VIOLATION: - WARN_ONCE(1, "TX: CQE error %d: ignored.\n", - cqe_oob->cqe_hdr.cqe_type); + if (net_ratelimit()) + netdev_err(ndev, "TX: CQE error %d\n", + cqe_oob->cqe_hdr.cqe_type); + apc->eth_stats.tx_cqe_err++; break; default: - /* If the CQE type is unexpected, log an error, assert, - * and go through the error path. + /* If the CQE type is unknown, log an error, + * and still free the SKB, update tail, etc. */ - WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n", - cqe_oob->cqe_hdr.cqe_type); + if (net_ratelimit()) + netdev_err(ndev, "TX: unknown CQE type %d\n", + cqe_oob->cqe_hdr.cqe_type); + apc->eth_stats.tx_cqe_unknown_type++; - return; + break; } if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) -- 2.25.1

2 years

2
1
0 0

[PATCH net,v2, 2/3] net: mana: Fix the tso_bytes calculation

by Haiyang Zhang

sizeof(struct hop_jumbo_hdr) is not part of tso_bytes, so remove the subtraction from header size. Cc: stable(a)vger.kernel.org Fixes: bd7fc6e1957c ("net: mana: Add new MANA VF performance counters for easier troubleshooting") Signed-off-by: Haiyang Zhang <haiyangz(a)microsoft.com> Reviewed-by: Simon Horman <horms(a)kernel.org> --- drivers/net/ethernet/microsoft/mana/mana_en.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 5cdcf7561b38..86e724c3eb89 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -264,8 +264,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) ihs = skb_transport_offset(skb) + sizeof(struct udphdr); } else { ihs = skb_tcp_all_headers(skb); - if (ipv6_has_hopopt_jumbo(skb)) - ihs -= sizeof(struct hop_jumbo_hdr); } u64_stats_update_begin(&tx_stats->syncp); -- 2.25.1

2 years

2
1
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror