From: Dean Luick <dean.luick(a)cornelisnetworks.com>
[ Upstream commit 892ede5a77f337831609fb9c248ac60948061894 ]
Fix possible RMT overflow: Use the correct netdev size.
Don't allow adjusted user contexts to go negative.
Fix QOS calculation: Send kernel context count as an argument since
dd->n_krcv_queues is not yet set up in earliest call. Do not include
the control context in the QOS calculation. Use the same sized
variable to find the max of krcvq[] entries.
Update the RMT count explanation to make more sense.
Signed-off-by: Dean Luick <dean.luick(a)cornelisnetworks.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro(a)cornelisnetworks.com>
Link: https://lore.kernel.org/r/167329106946.1472990.18385495251650939054.stgit@a…
Signed-off-by: Leon Romanovsky <leon(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/hfi1/chip.c | 59 +++++++++++++++++--------------
1 file changed, 32 insertions(+), 27 deletions(-)
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index ebe970f76232d..90b672feed83d 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1056,7 +1056,7 @@ static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
static void dc_shutdown(struct hfi1_devdata *dd);
static void dc_start(struct hfi1_devdata *dd);
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
@@ -13362,7 +13362,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
int ret;
unsigned ngroups;
int rmt_count;
- int user_rmt_reduced;
u32 n_usr_ctxts;
u32 send_contexts = chip_send_contexts(dd);
u32 rcv_contexts = chip_rcv_contexts(dd);
@@ -13421,28 +13420,34 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
(num_kernel_contexts + n_usr_ctxts),
&node_affinity.real_cpu_mask);
/*
- * The RMT entries are currently allocated as shown below:
- * 1. QOS (0 to 128 entries);
- * 2. FECN (num_kernel_context - 1 + num_user_contexts +
- * num_netdev_contexts);
- * 3. netdev (num_netdev_contexts).
- * It should be noted that FECN oversubscribe num_netdev_contexts
- * entries of RMT because both netdev and PSM could allocate any receive
- * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
- * and PSM FECN must reserve an RMT entry for each possible PSM receive
- * context.
+ * RMT entries are allocated as follows:
+ * 1. QOS (0 to 128 entries)
+ * 2. FECN (num_kernel_context - 1 [a] + num_user_contexts +
+ * num_netdev_contexts [b])
+ * 3. netdev (NUM_NETDEV_MAP_ENTRIES)
+ *
+ * Notes:
+ * [a] Kernel contexts (except control) are included in FECN if kernel
+ * TID_RDMA is active.
+ * [b] Netdev and user contexts are randomly allocated from the same
+ * context pool, so FECN must cover all contexts in the pool.
*/
- rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_netdev_contexts * 2);
- if (HFI1_CAP_IS_KSET(TID_RDMA))
- rmt_count += num_kernel_contexts - 1;
- if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
- user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
- dd_dev_err(dd,
- "RMT size is reducing the number of user receive contexts from %u to %d\n",
- n_usr_ctxts,
- user_rmt_reduced);
- /* recalculate */
- n_usr_ctxts = user_rmt_reduced;
+ rmt_count = qos_rmt_entries(num_kernel_contexts - 1, NULL, NULL)
+ + (HFI1_CAP_IS_KSET(TID_RDMA) ? num_kernel_contexts - 1
+ : 0)
+ + n_usr_ctxts
+ + num_netdev_contexts
+ + NUM_NETDEV_MAP_ENTRIES;
+ if (rmt_count > NUM_MAP_ENTRIES) {
+ int over = rmt_count - NUM_MAP_ENTRIES;
+ /* try to squish user contexts, minimum of 1 */
+ if (over >= n_usr_ctxts) {
+ dd_dev_err(dd, "RMT overflow: reduce the requested number of contexts\n");
+ return -EINVAL;
+ }
+ dd_dev_err(dd, "RMT overflow: reducing # user contexts from %u to %u\n",
+ n_usr_ctxts, n_usr_ctxts - over);
+ n_usr_ctxts -= over;
}
/* the first N are kernel contexts, the rest are user/netdev contexts */
@@ -14299,15 +14304,15 @@ static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
}
/* return the number of RSM map table entries that will be used for QOS */
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
unsigned int *np)
{
int i;
unsigned int m, n;
- u8 max_by_vl = 0;
+ uint max_by_vl = 0;
/* is QOS active at all? */
- if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
+ if (n_krcv_queues < MIN_KERNEL_KCTXTS ||
num_vls == 1 ||
krcvqsset <= 1)
goto no_qos;
@@ -14365,7 +14370,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
if (!rmt)
goto bail;
- rmt_entries = qos_rmt_entries(dd, &m, &n);
+ rmt_entries = qos_rmt_entries(dd->n_krcv_queues - 1, &m, &n);
if (rmt_entries == 0)
goto bail;
qpns_per_vl = 1 << m;
--
2.39.2
[Public]
Hi,
The commit "Remove OTG DIV register write for Virtual" was CC to stable but failed to apply 6.2.y and 6.1.y due to some small missing dependencies.
Here is the series of commits for 6.2.y needed:
3b214bb7185d ("drm/amd/display: fix k1 k2 divider programming for phantom streams")
709671ffb15d ("drm/amd/display: Remove OTG DIV register write for Virtual signals.")
Here is the series of commits for 6.1.y needed:
368307cef69c ("drm/amd/display: Include virtual signal to set k1 and k2 values")
3b214bb7185d ("drm/amd/display: fix k1 k2 divider programming for phantom streams")
709671ffb15d ("drm/amd/display: Remove OTG DIV register write for Virtual signals.")
Can you please apply for a future stable release?
Thanks!
From: NeilBrown <neilb(a)suse.de>
[ Upstream commit 3bc57292278a0b6ac4656cad94c14f2453344b57 ]
slot_store() uses kstrtouint() to get a slot number, but stores the
result in an "int" variable (by casting a pointer).
This can result in a negative slot number if the unsigned int value is
very large.
A negative number means that the slot is empty, but setting a negative
slot number this way will not remove the device from the array. I don't
think this is a serious problem, but it could cause confusion and it is
best to fix it.
Reported-by: Dan Carpenter <error27(a)gmail.com>
Signed-off-by: NeilBrown <neilb(a)suse.de>
Signed-off-by: Song Liu <song(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/md/md.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 880a0ebca8660..69d1501d9160e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2967,6 +2967,9 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
err = kstrtouint(buf, 10, (unsigned int *)&slot);
if (err < 0)
return err;
+ if (slot < 0)
+ /* overflow */
+ return -ENOSPC;
}
if (rdev->mddev->pers && slot == -1) {
/* Setting 'slot' on an active array requires also
--
2.39.2
From: NeilBrown <neilb(a)suse.de>
[ Upstream commit 3bc57292278a0b6ac4656cad94c14f2453344b57 ]
slot_store() uses kstrtouint() to get a slot number, but stores the
result in an "int" variable (by casting a pointer).
This can result in a negative slot number if the unsigned int value is
very large.
A negative number means that the slot is empty, but setting a negative
slot number this way will not remove the device from the array. I don't
think this is a serious problem, but it could cause confusion and it is
best to fix it.
Reported-by: Dan Carpenter <error27(a)gmail.com>
Signed-off-by: NeilBrown <neilb(a)suse.de>
Signed-off-by: Song Liu <song(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/md/md.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index aa2993d5d5d38..64558991ce0a0 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3082,6 +3082,9 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
err = kstrtouint(buf, 10, (unsigned int *)&slot);
if (err < 0)
return err;
+ if (slot < 0)
+ /* overflow */
+ return -ENOSPC;
}
if (rdev->mddev->pers && slot == -1) {
/* Setting 'slot' on an active array requires also
--
2.39.2