From: Danil Kipnis danil.kipnis@cloud.ionos.com
[ Upstream commit 09e0dbbeed82e35ce2cd21e086a6fac934163e2a ]
In order to avoid all the clients to start reconnecting at the same time schedule the reconnect dwork with a random jitter of +[0,8] seconds.
Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Link: https://lore.kernel.org/r/20200724111508.15734-2-haris.iqbal@cloud.ionos.com Signed-off-by: Danil Kipnis danil.kipnis@cloud.ionos.com Signed-off-by: Md Haris Iqbal haris.iqbal@cloud.ionos.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 564388a85603f..5b31d3b03737c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -12,6 +12,7 @@
#include <linux/module.h> #include <linux/rculist.h> +#include <linux/random.h>
#include "rtrs-clt.h" #include "rtrs-log.h" @@ -23,6 +24,12 @@ * leads to "false positives" failed reconnect attempts */ #define RTRS_RECONNECT_BACKOFF 1000 +/* + * Wait for additional random time between 0 and 8 seconds + * before starting to reconnect to avoid clients reconnecting + * all at once in case of a major network outage + */ +#define RTRS_RECONNECT_SEED 8
MODULE_DESCRIPTION("RDMA Transport Client"); MODULE_LICENSE("GPL"); @@ -306,7 +313,8 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) */ delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms)); + msecs_to_jiffies(delay_ms + + prandom_u32() % RTRS_RECONNECT_SEED)); } else { /* * Error can happen just on establishing new connection, @@ -2503,7 +2511,9 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) sess->stats->reconnects.fail_cnt++; delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms)); + msecs_to_jiffies(delay_ms + + prandom_u32() % + RTRS_RECONNECT_SEED)); } }