From: Keith Busch kbusch@kernel.org
Provide a helper to remove elements from a list to the end, and place those elements in a new list.
Signed-off-by: Keith Busch kbusch@kernel.org --- include/linux/list.h | 20 ++++++++++++++++++++ lib/list-test.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+)
diff --git a/include/linux/list.h b/include/linux/list.h index 5f4b0a39cf46a..f22850e854820 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -520,6 +520,26 @@ static inline void list_cut_before(struct list_head *list, entry->prev = head; }
+/** + * list_cut - cut a list into two from the entry + * @list: a new list to add all removed entries + * @head: a list with entries + * @entry: an entry within head, could be the head itself + * + * This helper removes elements from @head starting at @entry until the end, + * and appends them to @lists. + */ +static inline void list_cut(struct list_head *list, + struct list_head *head, struct list_head *entry) +{ + list->next = entry; + list->prev = head->prev; + head->prev = entry->prev; + entry->prev->next = head; + entry->prev = list; + list->prev->next = list; +} + static inline void __list_splice(const struct list_head *list, struct list_head *prev, struct list_head *next) diff --git a/lib/list-test.c b/lib/list-test.c index 0cc27de9cec88..1507f46cf1ade 100644 --- a/lib/list-test.c +++ b/lib/list-test.c @@ -382,6 +382,34 @@ static void list_test_list_is_singular(struct kunit *test) KUNIT_EXPECT_FALSE(test, list_is_singular(&list)); }
+static void list_test_list_cut(struct kunit *test) +{ + struct list_head entries[3], *cur; + LIST_HEAD(list1); + LIST_HEAD(list2); + int i = 0; + + list_add_tail(&entries[0], &list1); + list_add_tail(&entries[1], &list1); + list_add_tail(&entries[2], &list1); + + /* before: [list1] -> entries[0] -> entries[1] -> entries[2] */ + list_cut(&list2, &list1, &entries[1]); + /* after: [list1] -> entries[0], [list2] -> entries[1] -> entries[2] */ + + list_for_each(cur, &list1) { + KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); + i++; + } + + KUNIT_EXPECT_EQ(test, i, 1); + + list_for_each(cur, &list2) { + KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]); + i++; + } +} + static void list_test_list_cut_position(struct kunit *test) { struct list_head entries[3], *cur; @@ -780,6 +808,7 @@ static struct kunit_case list_test_cases[] = { KUNIT_CASE(list_test_list_is_singular), KUNIT_CASE(list_test_list_cut_position), KUNIT_CASE(list_test_list_cut_before), + KUNIT_CASE(list_test_list_cut), KUNIT_CASE(list_test_list_splice), KUNIT_CASE(list_test_list_splice_tail), KUNIT_CASE(list_test_list_splice_init),
From: Keith Busch kbusch@kernel.org
This function wants to move a subset of a list from an element to the end to another list, so do that with the new list_cut helper instead of using the wrong list splice.
Fixes: be647e2c76b27f4 ("nvme: use srcu for iterating namespace list") Reported-by: Venkat Rao Bagalkote venkat88@linux.vnet.ibm.com Tested-by: Venkat Rao Bagalkote venkat88@linux.vnet.ibm.com Signed-off-by: Keith Busch kbusch@kernel.org --- drivers/nvme/host/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f5d150c62955d..30fec15872b3d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3959,9 +3959,10 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
mutex_lock(&ctrl->namespaces_lock); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { - if (ns->head->ns_id > nsid) - list_splice_init_rcu(&ns->list, &rm_list, - synchronize_rcu); + if (ns->head->ns_id > nsid) { + list_cut(&rm_list, &ctrl->namespaces, &ns->list); + break; + } } mutex_unlock(&ctrl->namespaces_lock); synchronize_srcu(&ctrl->srcu);
On 6/12/24 08:51, Keith Busch wrote:
From: Keith Buschkbusch@kernel.org
This function wants to move a subset of a list from an element to the end to another list, so do that with the new list_cut helper instead of using the wrong list splice.
Fixes: be647e2c76b27f4 ("nvme: use srcu for iterating namespace list") Reported-by: Venkat Rao Bagalkotevenkat88@linux.vnet.ibm.com Tested-by: Venkat Rao Bagalkotevenkat88@linux.vnet.ibm.com Signed-off-by: Keith Buschkbusch@kernel.org
not a blocker, but it'd be really useful if we can get a blktests for this, I've asked OP to provide steps at least ...
Looks good.
Reviewed-by: Chaitanya Kulkarni kch@nvidia.com
-ck
On Wed, Jun 12, 2024 at 05:20:48PM +0000, Chaitanya Kulkarni wrote:
On 6/12/24 08:51, Keith Busch wrote:
From: Keith Buschkbusch@kernel.org
This function wants to move a subset of a list from an element to the end to another list, so do that with the new list_cut helper instead of using the wrong list splice.
Fixes: be647e2c76b27f4 ("nvme: use srcu for iterating namespace list") Reported-by: Venkat Rao Bagalkotevenkat88@linux.vnet.ibm.com Tested-by: Venkat Rao Bagalkotevenkat88@linux.vnet.ibm.com Signed-off-by: Keith Buschkbusch@kernel.org
not a blocker, but it'd be really useful if we can get a blktests for this, I've asked OP to provide steps at least ...
Recreate the original report: detach the highest namespace id from your controller. Let's say it's nsid 10, and let's assume your ctrlid is 0:
# nvme detach-ns /dev/nvme0 -c 0 -n 10
blktests supposedly has something that detaches namespaces through the nvmet module, but it doesn't seem to be catching anything.
On 6/12/24 08:51, Keith Busch wrote:
From: Keith Busch kbusch@kernel.org
Provide a helper to remove elements from a list to the end, and place those elements in a new list.
Signed-off-by: Keith Busch kbusch@kernel.org
Looks good.
Reviewed-by: Chaitanya Kulkarni kch@nvidia.com
-ck
I did quick run before the review if anybody cares :-
head entry list
node [3 | | 2] [1 | | 3] [2 | | 1] [X | | X] addr 1 2 3 4
+ list->next = entry; + list->prev = head->prev; node [3 | | 2] addr 4
+ head->prev = entry->prev; node [1 | | 2] addr 1 + entry->prev->next = head; node [1 | | 1] addr 1
+ entry->prev = list; node [4 | | 3] addr 2
+ list->prev->next = list; node [2 | | 4] addr 3
reordering with list at front with head :- head node [1 | | 1] addr list node [3 | | 2] [4 | | 3] [2 | | 4] addr 4 2 3
On 6/12/24 21:21, Keith Busch wrote:
From: Keith Busch kbusch@kernel.org
Provide a helper to remove elements from a list to the end, and place those elements in a new list.
Signed-off-by: Keith Busch kbusch@kernel.org
include/linux/list.h | 20 ++++++++++++++++++++ lib/list-test.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+)
diff --git a/include/linux/list.h b/include/linux/list.h index 5f4b0a39cf46a..f22850e854820 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -520,6 +520,26 @@ static inline void list_cut_before(struct list_head *list, entry->prev = head; } +/**
- list_cut - cut a list into two from the entry
- @list: a new list to add all removed entries
- @head: a list with entries
- @entry: an entry within head, could be the head itself
- This helper removes elements from @head starting at @entry until the end,
- and appends them to @lists.
- */
+static inline void list_cut(struct list_head *list,
struct list_head *head, struct list_head *entry)
+{
- list->next = entry;
- list->prev = head->prev;
- head->prev = entry->prev;
- entry->prev->next = head;
- entry->prev = list;
- list->prev->next = list;
+}
I am wondering whether we really need the _rcu version of list_cut here? I think that @head could point to an _rcu protected list and that's true for this patch. So there might be concurrent readers accessing @head using _rcu list-traversal primitives, such as list_for_each_entry_rcu().
An _rcu version of list_cut():
static inline void list_cut_rcu(struct list_head *list, struct list_head *head, struct list_head *entry) { list->next = entry; list->prev = head->prev; head->prev = entry->prev; rcu_assign_pointer(list_next_rcu(entry->prev), head); entry->prev = list; list->prev->next = list; }
Thanks, --Nilay
On Thu, Jun 13, 2024 at 10:26:11AM +0530, Nilay Shroff wrote:
I am wondering whether we really need the _rcu version of list_cut here? I think that @head could point to an _rcu protected list and that's true for this patch. So there might be concurrent readers accessing @head using _rcu list-traversal primitives, such as list_for_each_entry_rcu().
Yes, I can't see how this works for a RCU lists without very careful memory ordering.
Btw, another thing - the old vs new list ordering is reversed vs list_splice*, which is a bit confusing (as are the parameter names both for list_splice* and this new helper). Can you switch them around to match?
On Thu, Jun 13, 2024 at 10:10:16AM +0200, Christoph Hellwig wrote:
On Thu, Jun 13, 2024 at 10:26:11AM +0530, Nilay Shroff wrote:
I am wondering whether we really need the _rcu version of list_cut here? I think that @head could point to an _rcu protected list and that's true for this patch. So there might be concurrent readers accessing @head using _rcu list-traversal primitives, such as list_for_each_entry_rcu().
Yes, I can't see how this works for a RCU lists without very careful memory ordering.
Btw, another thing - the old vs new list ordering is reversed vs list_splice*, which is a bit confusing (as are the parameter names both for list_splice* and this new helper). Can you switch them around to match?
The parameters follow the existing conventions from list_cut_back and list_cut_position. Those functions cut off from the head to the "entry", and this one cuts off the "entry" to the tail instead.
On Thu, Jun 13, 2024 at 10:26:11AM +0530, Nilay Shroff wrote:
On 6/12/24 21:21, Keith Busch wrote:
+static inline void list_cut(struct list_head *list,
struct list_head *head, struct list_head *entry)
+{
- list->next = entry;
- list->prev = head->prev;
- head->prev = entry->prev;
- entry->prev->next = head;
- entry->prev = list;
- list->prev->next = list;
+}
I am wondering whether we really need the _rcu version of list_cut here? I think that @head could point to an _rcu protected list and that's true for this patch. So there might be concurrent readers accessing @head using _rcu list-traversal primitives, such as list_for_each_entry_rcu().
An _rcu version of list_cut():
static inline void list_cut_rcu(struct list_head *list, struct list_head *head, struct list_head *entry) { list->next = entry; list->prev = head->prev; head->prev = entry->prev; rcu_assign_pointer(list_next_rcu(entry->prev), head); entry->prev = list; list->prev->next = list; }
I was initially thinking similiar, but this is really just doing a "list_del", and the rcu version calls the same generic __list_del() helper. To make this more clear, we could change
head->prev = entry->prev; entry->prev->next = head;
To just this:
__list_del(entry->prev, head);
And that also gets the "WRITE_ONCE" usage right.
But that's not the problem for the rcu case. It's the last line that's the problem:
list->prev->next = list;
We can't change forward pointers for any element being detached from @head because a reader iterating the list may see that new pointer value and end up in the wrong list, breaking iteration. A synchronize rcu needs to happen before forward pointers can be mucked with, so it still needs to be done in two steps. Oh bother...
On 6/13/24 18:26, Keith Busch wrote:
On Thu, Jun 13, 2024 at 10:26:11AM +0530, Nilay Shroff wrote:
On 6/12/24 21:21, Keith Busch wrote:
+static inline void list_cut(struct list_head *list,
struct list_head *head, struct list_head *entry)
+{
- list->next = entry;
- list->prev = head->prev;
- head->prev = entry->prev;
- entry->prev->next = head;
- entry->prev = list;
- list->prev->next = list;
+}
I am wondering whether we really need the _rcu version of list_cut here? I think that @head could point to an _rcu protected list and that's true for this patch. So there might be concurrent readers accessing @head using _rcu list-traversal primitives, such as list_for_each_entry_rcu().
An _rcu version of list_cut():
static inline void list_cut_rcu(struct list_head *list, struct list_head *head, struct list_head *entry) { list->next = entry; list->prev = head->prev; head->prev = entry->prev; rcu_assign_pointer(list_next_rcu(entry->prev), head); entry->prev = list; list->prev->next = list; }
I was initially thinking similiar, but this is really just doing a "list_del", and the rcu version calls the same generic __list_del() helper. To make this more clear, we could change
head->prev = entry->prev; entry->prev->next = head;
To just this:
__list_del(entry->prev, head);
And that also gets the "WRITE_ONCE" usage right.
Yeah this sounds reasonable.
But that's not the problem for the rcu case. It's the last line that's the problem:
list->prev->next = list;
We can't change forward pointers for any element being detached from @head because a reader iterating the list may see that new pointer value and end up in the wrong list, breaking iteration. A synchronize rcu needs to happen before forward pointers can be mucked with, so it still needs to be done in two steps. Oh bother...
Agree and probably we may break it down using this API: static inline void list_cut_rcu(struct list_head *list, struct list_head *head, struct list_head *entry, void (*sync)(void)) { list->next = entry; list->prev = head->prev; __list_del(entry->prev, head); sync(); entry->prev = list; list->prev->next = list; }
Thanks, --Nilay
On Thu, Jun 13, 2024 at 07:11:52PM +0530, Nilay Shroff wrote:
On 6/13/24 18:26, Keith Busch wrote:
But that's not the problem for the rcu case. It's the last line that's the problem:
list->prev->next = list;
We can't change forward pointers for any element being detached from @head because a reader iterating the list may see that new pointer value and end up in the wrong list, breaking iteration. A synchronize rcu needs to happen before forward pointers can be mucked with, so it still needs to be done in two steps. Oh bother...
Agree and probably we may break it down using this API: static inline void list_cut_rcu(struct list_head *list, struct list_head *head, struct list_head *entry, void (*sync)(void)) { list->next = entry; list->prev = head->prev; __list_del(entry->prev, head); sync(); entry->prev = list; list->prev->next = list; }
Yes, that's the pattern, but I think we need an _srcu() variant: the "sync" callback needs to know the srcu_struct.
On Thu, Jun 13, 2024 at 08:36:44AM -0600, Keith Busch wrote:
On Thu, Jun 13, 2024 at 07:11:52PM +0530, Nilay Shroff wrote:
On 6/13/24 18:26, Keith Busch wrote:
But that's not the problem for the rcu case. It's the last line that's the problem:
list->prev->next = list;
We can't change forward pointers for any element being detached from @head because a reader iterating the list may see that new pointer value and end up in the wrong list, breaking iteration. A synchronize rcu needs to happen before forward pointers can be mucked with, so it still needs to be done in two steps. Oh bother...
Agree and probably we may break it down using this API: static inline void list_cut_rcu(struct list_head *list, struct list_head *head, struct list_head *entry, void (*sync)(void)) { list->next = entry; list->prev = head->prev; __list_del(entry->prev, head); sync(); entry->prev = list; list->prev->next = list; }
Yes, that's the pattern, but I think we need an _srcu() variant: the "sync" callback needs to know the srcu_struct.
Just make a helper function like this:
static void my_synchronize_srcu(void) { synchronize_srcu(&my_srcu_struct); }
Or am I missing something subtle here?
Thanx, Paul
On Thu, Jun 13, 2024 at 07:43:35AM -0700, Paul E. McKenney wrote:
On Thu, Jun 13, 2024 at 08:36:44AM -0600, Keith Busch wrote:
On Thu, Jun 13, 2024 at 07:11:52PM +0530, Nilay Shroff wrote:
On 6/13/24 18:26, Keith Busch wrote:
But that's not the problem for the rcu case. It's the last line that's the problem:
list->prev->next = list;
We can't change forward pointers for any element being detached from @head because a reader iterating the list may see that new pointer value and end up in the wrong list, breaking iteration. A synchronize rcu needs to happen before forward pointers can be mucked with, so it still needs to be done in two steps. Oh bother...
Agree and probably we may break it down using this API: static inline void list_cut_rcu(struct list_head *list, struct list_head *head, struct list_head *entry, void (*sync)(void)) { list->next = entry; list->prev = head->prev; __list_del(entry->prev, head); sync(); entry->prev = list; list->prev->next = list; }
Yes, that's the pattern, but I think we need an _srcu() variant: the "sync" callback needs to know the srcu_struct.
Just make a helper function like this:
static void my_synchronize_srcu(void) { synchronize_srcu(&my_srcu_struct); }
Or am I missing something subtle here?
That would work if we had a global srcu, but the intended usage dynamically allocates one per device the driver is attached to, so a void callback doesn't know which one to sync.
On Thu, Jun 13, 2024 at 08:47:26AM -0600, Keith Busch wrote:
On Thu, Jun 13, 2024 at 07:43:35AM -0700, Paul E. McKenney wrote:
On Thu, Jun 13, 2024 at 08:36:44AM -0600, Keith Busch wrote:
On Thu, Jun 13, 2024 at 07:11:52PM +0530, Nilay Shroff wrote:
On 6/13/24 18:26, Keith Busch wrote:
But that's not the problem for the rcu case. It's the last line that's the problem:
list->prev->next = list;
We can't change forward pointers for any element being detached from @head because a reader iterating the list may see that new pointer value and end up in the wrong list, breaking iteration. A synchronize rcu needs to happen before forward pointers can be mucked with, so it still needs to be done in two steps. Oh bother...
Agree and probably we may break it down using this API: static inline void list_cut_rcu(struct list_head *list, struct list_head *head, struct list_head *entry, void (*sync)(void)) { list->next = entry; list->prev = head->prev; __list_del(entry->prev, head); sync(); entry->prev = list; list->prev->next = list; }
Yes, that's the pattern, but I think we need an _srcu() variant: the "sync" callback needs to know the srcu_struct.
Just make a helper function like this:
static void my_synchronize_srcu(void) { synchronize_srcu(&my_srcu_struct); }
Or am I missing something subtle here?
That would work if we had a global srcu, but the intended usage dynamically allocates one per device the driver is attached to, so a void callback doesn't know which one to sync.
Ah, good point! I suppose that a further suggestion to just JIT the needed function would not be well-received? ;-)
I cannot resist suggesting placing a pointer to the srcu_struct in the task structure. /me runs...
Perhaps somewhat more constructively, my usual question: Is it really necessary to have per-driver SRCU here? What would break if there was a global srcu_struct that applied to all drivers?
Thanx, Paul
On Thu, Jun 13, 2024 at 08:15:06AM -0700, Paul E. McKenney wrote:
On Thu, Jun 13, 2024 at 08:47:26AM -0600, Keith Busch wrote:
Just make a helper function like this:
static void my_synchronize_srcu(void) { synchronize_srcu(&my_srcu_struct); }
Or am I missing something subtle here?
That would work if we had a global srcu, but the intended usage dynamically allocates one per device the driver is attached to, so a void callback doesn't know which one to sync.
Ah, good point! I suppose that a further suggestion to just JIT the needed function would not be well-received? ;-)
I cannot resist suggesting placing a pointer to the srcu_struct in the task structure. /me runs...
Perhaps somewhat more constructively, my usual question: Is it really necessary to have per-driver SRCU here? What would break if there was a global srcu_struct that applied to all drivers?
There's not a strict need for srcu_struct to be per device that I know of. It was just done this way to keep usage localized to the parts that need to be protected. The fear being that one device's long running reader could prevent another device from quickly tearing down.
On Thu, Jun 13, 2024 at 09:40:36AM -0600, Keith Busch wrote:
On Thu, Jun 13, 2024 at 08:15:06AM -0700, Paul E. McKenney wrote:
On Thu, Jun 13, 2024 at 08:47:26AM -0600, Keith Busch wrote:
Just make a helper function like this:
static void my_synchronize_srcu(void) { synchronize_srcu(&my_srcu_struct); }
Or am I missing something subtle here?
That would work if we had a global srcu, but the intended usage dynamically allocates one per device the driver is attached to, so a void callback doesn't know which one to sync.
Ah, good point! I suppose that a further suggestion to just JIT the needed function would not be well-received? ;-)
I cannot resist suggesting placing a pointer to the srcu_struct in the task structure. /me runs...
Perhaps somewhat more constructively, my usual question: Is it really necessary to have per-driver SRCU here? What would break if there was a global srcu_struct that applied to all drivers?
There's not a strict need for srcu_struct to be per device that I know of. It was just done this way to keep usage localized to the parts that need to be protected. The fear being that one device's long running reader could prevent another device from quickly tearing down.
That is a legitimate concern.
Is there a way to avoid this issue by making this be a statement parameter to a macro?
Thanx, Paul
On Thu, Jun 13, 2024 at 09:01:47AM -0700, Paul E. McKenney wrote:
Is there a way to avoid this issue by making this be a statement parameter to a macro?
Something like this? It appears to work for the intended use, at least.
--- diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 3dc1e58865f77..cdd2e5c0d5cdb 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -204,6 +204,30 @@ static inline void list_replace_rcu(struct list_head *old, old->prev = LIST_POISON2; }
+ +static inline void __list_cut_start(struct list_head *list, + struct list_head *head, + struct list_head *entry) +{ + list->next = entry; + list->prev = head->prev; + __list_del(entry->prev, head); +} + +static inline void __list_cut_end(struct list_head *list, + struct list_head *entry) +{ + entry->prev = list; + list->prev->next = list; +} + +#define list_cut_rcu(list, head, entry, sync) \ + do { \ + __list_cut_start(list, head, entry); \ + sync; \ + __list_cut_end(list, entry); \ + } while (0) + /** * __list_splice_init_rcu - join an RCU-protected list into an existing list. * @list: the RCU-protected list to splice --
On Thu, Jun 13, 2024 at 10:10:38AM -0600, Keith Busch wrote:
On Thu, Jun 13, 2024 at 09:01:47AM -0700, Paul E. McKenney wrote:
Is there a way to avoid this issue by making this be a statement parameter to a macro?
Something like this? It appears to work for the intended use, at least.
diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 3dc1e58865f77..cdd2e5c0d5cdb 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -204,6 +204,30 @@ static inline void list_replace_rcu(struct list_head *old, old->prev = LIST_POISON2; }
+static inline void __list_cut_start(struct list_head *list,
struct list_head *head,
struct list_head *entry)
+{
- list->next = entry;
- list->prev = head->prev;
- __list_del(entry->prev, head);
+}
+static inline void __list_cut_end(struct list_head *list,
struct list_head *entry)
+{
- entry->prev = list;
- list->prev->next = list;
+}
+#define list_cut_rcu(list, head, entry, sync) \
- do { \
__list_cut_start(list, head, entry); \
At this point, old readers might see the new list starting from "head" and new readers see the new (shorter) list, again, starting from "head". Presumably no readers can yet see "list".
sync; \
There are now no old readers, and thus no readers that can see any elements in the list starting from "entry".
__list_cut_end(list, entry); \
And this fixes up the list now headed by "list".
So:
Reviewed-by: Paul E. McKenney paulmck@kernel.org
And another argument for lambdas, not that there is a shortage of arguments against them. ;-)
- } while (0)
/**
- __list_splice_init_rcu - join an RCU-protected list into an existing list.
- @list: the RCU-protected list to splice
--
linux-kselftest-mirror@lists.linaro.org