Hi,
Can you pick these patches to 4.14.y?
These fix some "perf test" errors seen when running in VM.
commit 10836d9f9ac63d40ccfa756f871ce4ed51ae3b52
Author: Jiri Olsa <jolsa(a)kernel.org>
Date: Mon Jul 3 16:50:30 2017 +0200
perf tests attr: Fix task term values
commit f6a9820d572bd8384d982357cbad214b3a6c04bb
Author: Jiri Olsa <jolsa(a)redhat.com>
Date: Thu Sep 28 18:06:33 2017 +0200
perf tests attr: Fix group stat tests
commit 692f5a22cd284bb8233a38e3ed86881d2d9c89d4
Author: Jiri Olsa <jolsa(a)kernel.org>
Date: Mon Oct 9 15:07:12 2017 +0200
perf tests attr: Make hw events optional
-Tommi
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e248aa7be86e8179f20ac0931774ecd746f3f5bf Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Fri, 25 Jan 2019 16:54:54 -0500
Subject: [PATCH] svcrdma: Remove max_sge check at connect time
Two and a half years ago, the client was changed to use gathered
Send for larger inline messages, in commit 655fec6987b ("xprtrdma:
Use gathered Send for large inline messages"). Several fixes were
required because there are a few in-kernel device drivers whose
max_sge is 3, and these were broken by the change.
Apparently my memory is going, because some time later, I submitted
commit 25fd86eca11c ("svcrdma: Don't overrun the SGE array in
svc_rdma_send_ctxt"), and after that, commit f3c1fd0ee294 ("svcrdma:
Reduce max_send_sges"). These too incorrectly assumed in-kernel
device drivers would have more than a few Send SGEs available.
The fix for the server side is not the same. This is because the
fundamental problem on the server is that, whether or not the client
has provisioned a chunk for the RPC reply, the server must squeeze
even the most complex RPC replies into a single RDMA Send. Failing
in the send path because of Send SGE exhaustion should never be an
option.
Therefore, instead of failing when the send path runs out of SGEs,
switch to using a bounce buffer mechanism to handle RPC replies that
are too complex for the device to send directly. That allows us to
remove the max_sge check to enable drivers with small max_sge to
work again.
Reported-by: Don Dutile <ddutile(a)redhat.com>
Fixes: 25fd86eca11c ("svcrdma: Don't overrun the SGE array in ...")
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
Signed-off-by: J. Bruce Fields <bfields(a)redhat.com>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index cf51b8f9b15f..1f200119268c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -537,6 +537,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
DMA_TO_DEVICE);
}
+/* If the xdr_buf has more elements than the device can
+ * transmit in a single RDMA Send, then the reply will
+ * have to be copied into a bounce buffer.
+ */
+static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
+ struct xdr_buf *xdr,
+ __be32 *wr_lst)
+{
+ int elements;
+
+ /* xdr->head */
+ elements = 1;
+
+ /* xdr->pages */
+ if (!wr_lst) {
+ unsigned int remaining;
+ unsigned long pageoff;
+
+ pageoff = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ ++elements;
+ remaining -= min_t(u32, PAGE_SIZE - pageoff,
+ remaining);
+ pageoff = 0;
+ }
+ }
+
+ /* xdr->tail */
+ if (xdr->tail[0].iov_len)
+ ++elements;
+
+ /* assume 1 SGE is needed for the transport header */
+ return elements >= rdma->sc_max_send_sges;
+}
+
+/* The device is not capable of sending the reply directly.
+ * Assemble the elements of @xdr into the transport header
+ * buffer.
+ */
+static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt,
+ struct xdr_buf *xdr, __be32 *wr_lst)
+{
+ unsigned char *dst, *tailbase;
+ unsigned int taillen;
+
+ dst = ctxt->sc_xprt_buf;
+ dst += ctxt->sc_sges[0].length;
+
+ memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
+ dst += xdr->head[0].iov_len;
+
+ tailbase = xdr->tail[0].iov_base;
+ taillen = xdr->tail[0].iov_len;
+ if (wr_lst) {
+ u32 xdrpad;
+
+ xdrpad = xdr_padsize(xdr->page_len);
+ if (taillen && xdrpad) {
+ tailbase += xdrpad;
+ taillen -= xdrpad;
+ }
+ } else {
+ unsigned int len, remaining;
+ unsigned long pageoff;
+ struct page **ppages;
+
+ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ pageoff = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ len = min_t(u32, PAGE_SIZE - pageoff, remaining);
+
+ memcpy(dst, page_address(*ppages), len);
+ remaining -= len;
+ dst += len;
+ pageoff = 0;
+ }
+ }
+
+ if (taillen)
+ memcpy(dst, tailbase, taillen);
+
+ ctxt->sc_sges[0].length += xdr->len;
+ ib_dma_sync_single_for_device(rdma->sc_pd->device,
+ ctxt->sc_sges[0].addr,
+ ctxt->sc_sges[0].length,
+ DMA_TO_DEVICE);
+
+ return 0;
+}
+
/* svc_rdma_map_reply_msg - Map the buffer holding RPC message
* @rdma: controlling transport
* @ctxt: send_ctxt for the Send WR
@@ -559,8 +652,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
u32 xdr_pad;
int ret;
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst))
+ return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst);
+
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, ctxt,
xdr->head[0].iov_base,
xdr->head[0].iov_len);
@@ -591,8 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
while (remaining) {
len = min_t(u32, PAGE_SIZE - page_off, remaining);
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
page_off, len);
if (ret < 0)
@@ -606,8 +700,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
len = xdr->tail[0].iov_len;
tail:
if (len) {
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
if (ret < 0)
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 924c17d46903..57f86c63a463 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -419,12 +419,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Transport header, head iovec, tail iovec */
newxprt->sc_max_send_sges = 3;
/* Add one SGE per page list entry */
- newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE;
- if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) {
- pr_err("svcrdma: too few Send SGEs available (%d needed)\n",
- newxprt->sc_max_send_sges);
- goto errout;
- }
+ newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
+ if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
+ newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = svcrdma_max_requests;
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
The patch below does not apply to the 4.20-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d028a646e84b9b131e4ff2cb5bbdd3825d141028 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Tue, 5 Feb 2019 16:18:46 +0200
Subject: [PATCH] drm/i915: Try to sanitize bogus DPLL state left over by
broken SNB BIOSen
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Certain SNB machines (eg. ASUS K53SV) seem to have a broken BIOS
which misprograms the hardware badly when encountering a suitably
high resolution display. The programmed pipe timings are somewhat
bonkers and the DPLL is totally misprogrammed (P divider == 0).
That will result in atomic commit timeouts as apparently the pipe
is sufficiently stuck to not signal vblank interrupts.
IIRC something like this was also observed on some other SNB
machine years ago (might have been a Dell XPS 8300) but a BIOS
update cured it. Sadly looks like this was never fixed for the
ASUS K53SV as the latest BIOS (K53SV.320 11/11/2011) is still
broken.
The quickest way to deal with this seems to be to shut down
the pipe+ports+DPLL. Unfortunately doing this during the
normal sanitization phase isn't quite soon enough as we
already spew several WARNs about the bogus hardware state.
But it's better than hanging the boot for a few dozen seconds.
Since this is limited to a few old machines it doesn't seem
entirely worthwile to try and rework the readout+sanitization
code to handle it more gracefully.
v2: Fix potential NULL deref (kbuild test robot)
Constify has_bogus_dpll_config()
Cc: stable(a)vger.kernel.org # v4.20+
Cc: Daniel Kamil Kozar <dkk089(a)gmail.com>
Reported-by: Daniel Kamil Kozar <dkk089(a)gmail.com>
Tested-by: Daniel Kamil Kozar <dkk089(a)gmail.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109245
Fixes: 516a49cc1946 ("drm/i915: Fix assert_plane() warning on bootup with external display")
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190111174950.10681-1-ville.…
Reviewed-by: Mika Kahola <mika.kahola(a)intel.com>
(cherry picked from commit 7bed8adcd9f86231bb69bbc02f88ad89330f99e3)
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190205141846.6053-1-ville.s…
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3da9c0f9e948..248128126422 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15415,16 +15415,45 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
}
}
+static bool has_bogus_dpll_config(const struct intel_crtc_state *crtc_state)
+{
+ struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+ /*
+ * Some SNB BIOSen (eg. ASUS K53SV) are known to misprogram
+ * the hardware when a high res displays plugged in. DPLL P
+ * divider is zero, and the pipe timings are bonkers. We'll
+ * try to disable everything in that case.
+ *
+ * FIXME would be nice to be able to sanitize this state
+ * without several WARNs, but for now let's take the easy
+ * road.
+ */
+ return IS_GEN6(dev_priv) &&
+ crtc_state->base.active &&
+ crtc_state->shared_dpll &&
+ crtc_state->port_clock == 0;
+}
+
static void intel_sanitize_encoder(struct intel_encoder *encoder)
{
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct intel_connector *connector;
+ struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+ struct intel_crtc_state *crtc_state = crtc ?
+ to_intel_crtc_state(crtc->base.state) : NULL;
/* We need to check both for a crtc link (meaning that the
* encoder is active and trying to read from a pipe) and the
* pipe itself being active. */
- bool has_active_crtc = encoder->base.crtc &&
- to_intel_crtc(encoder->base.crtc)->active;
+ bool has_active_crtc = crtc_state &&
+ crtc_state->base.active;
+
+ if (crtc_state && has_bogus_dpll_config(crtc_state)) {
+ DRM_DEBUG_KMS("BIOS has misprogrammed the hardware. Disabling pipe %c\n",
+ pipe_name(crtc->pipe));
+ has_active_crtc = false;
+ }
connector = intel_encoder_find_connector(encoder);
if (connector && !has_active_crtc) {
@@ -15435,16 +15464,25 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
/* Connector is active, but has no active pipe. This is
* fallout from our resume register restoring. Disable
* the encoder manually again. */
- if (encoder->base.crtc) {
- struct drm_crtc_state *crtc_state = encoder->base.crtc->state;
+ if (crtc_state) {
+ struct drm_encoder *best_encoder;
DRM_DEBUG_KMS("[ENCODER:%d:%s] manually disabled\n",
encoder->base.base.id,
encoder->base.name);
+
+ /* avoid oopsing in case the hooks consult best_encoder */
+ best_encoder = connector->base.state->best_encoder;
+ connector->base.state->best_encoder = &encoder->base;
+
if (encoder->disable)
- encoder->disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
+ encoder->disable(encoder, crtc_state,
+ connector->base.state);
if (encoder->post_disable)
- encoder->post_disable(encoder, to_intel_crtc_state(crtc_state), connector->base.state);
+ encoder->post_disable(encoder, crtc_state,
+ connector->base.state);
+
+ connector->base.state->best_encoder = best_encoder;
}
encoder->base.crtc = NULL;
This patch is only appropriate for stable kernels v4.16 - v4.19
Since commit 9b30889c548a ("SUNRPC: Ensure we always close the socket after
a connection shuts down"), and until commit c544577daddb ("SUNRPC: Clean up
transport write space handling"), it is possible for the NFS client to spin
in the following tight loop:
269.964083: rpc_task_run_action: task:43@0 flags=5a81 state=0005 status=0 action=call_bind [sunrpc]
269.964083: rpc_task_run_action: task:43@0 flags=5a81 state=0005 status=0 action=call_connect [sunrpc]
269.964083: rpc_task_run_action: task:43@0 flags=5a81 state=0005 status=0 action=call_transmit [sunrpc]
269.964085: xprt_transmit: peer=[10.0.1.82]:2049 xid=0x761d3f77 status=-32
269.964085: rpc_task_run_action: task:43@0 flags=5a81 state=0005 status=-32 action=call_transmit_status [sunrpc]
269.964085: rpc_task_run_action: task:43@0 flags=5a81 state=0005 status=-32 action=call_status [sunrpc]
269.964085: rpc_call_status: task:43@0 status=-32
The issue is that the path through call_transmit_status does not release
the XPRT_LOCK when the transmit result is -EPIPE, so the socket cannot be
properly shut down.
The below commit fixed things up in mainline by unconditionally calling
xprt_end_transmit() and releasing the XPRT_LOCK after every pass through
call_transmit. However, the entirety of this commit is not appropriate for
stable kernels because its original inclusion was part of a series that
modifies the sunrpc code to use a different queueing model. As a result,
there are machinations within this patch that are not needed for a stable
fix and will not make sense without a larger backport of the mainline
series.
In this patch, we take the slightly modified bit of the mainline patch
below, which is to release the XPRT_LOCK on transmission error should we
detect that the transport is waiting to close.
commit c544577daddb618c7dd5fa7fb98d6a41782f020e upstream
Author: Trond Myklebust <trond.myklebust(a)hammerspace.com>
Date: Mon Sep 3 23:39:27 2018 -0400
SUNRPC: Clean up transport write space handling
Treat socket write space handling in the same way we now treat transport
congestion: by denying the XPRT_LOCK until the transport signals that it
has free buffer space.
Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com>
The original discussion of the problem is here:
https://lore.kernel.org/linux-nfs/20181212135157.4489-1-dwysocha@redhat.com…
This passes my usual cthon and xfstests on NFS as applied on v4.19 mainline.
Reported-by: Dave Wysochanski <dwysocha(a)redhat.com>
Suggested-by: Trond Myklebust <trondmy(a)hammerspace.com>
Signed-off-by: Benjamin Coddington <bcodding(a)redhat.com>
---
include/linux/sunrpc/xprt.h | 5 +++++
net/sunrpc/clnt.c | 6 ++++--
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 336fd1a19cca..f30bf500888d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -443,6 +443,11 @@ static inline int xprt_test_and_set_connecting(struct rpc_xprt *xprt)
return test_and_set_bit(XPRT_CONNECTING, &xprt->state);
}
+static inline int xprt_close_wait(struct rpc_xprt *xprt)
+{
+ return test_bit(XPRT_CLOSE_WAIT, &xprt->state);
+}
+
static inline void xprt_set_bound(struct rpc_xprt *xprt)
{
test_and_set_bit(XPRT_BOUND, &xprt->state);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8ea2f5fadd96..1fc812ba9871 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1992,13 +1992,15 @@ call_transmit(struct rpc_task *task)
static void
call_transmit_status(struct rpc_task *task)
{
+ struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
task->tk_action = call_status;
/*
* Common case: success. Force the compiler to put this
- * test first.
+ * test first. Or, if any error and xprt_close_wait,
+ * release the xprt lock so the socket can close.
*/
- if (task->tk_status == 0) {
+ if (task->tk_status == 0 || xprt_close_wait(xprt)) {
xprt_end_transmit(task);
rpc_task_force_reencode(task);
return;
--
2.14.3
Please include the upstream patches
* commit 085def3ade52 ("nfsd4: fix cached replies to solo SEQUENCE
compounds")
* commit 53da6a53e1d4 ("nfsd4: catch some false session retries")
into longterm kernel 4.14.
Because these patches went upstream in 4.15, they are already
included in longterm 4.19 and stable.
A nfsd4 server failure, where the server sends unrelated replies
to client requests containing only SEQUENCE operations has been
experienced with kernel version 4.14.87 [1]. Because the client
retries endlessly, this completely breaks the nfs mount.
A backport of these patches has been suggested by the patch
author J. Bruce Fields. [1]
Only limited testing has been done (running a single server with
4.14.96 and these two patches for a week and generating some high
load on it). It can not be proven by testing, that the patches
really fix the experienced problem. However, as the patches are
part of upstream and stable kernels for over a year
they should be a save pick.
Backport note: Only line numbers updated.
[1] https://bugzilla.kernel.org/show_bug.cgi?id=202435
J. Bruce Fields (2):
nfsd4: fix cached replies to solo SEQUENCE compounds
nfsd4: catch some false session retries
fs/nfsd/nfs4state.c | 57 ++++++++++++++++++++++++++++++++++++++++-----
fs/nfsd/state.h | 2 ++
fs/nfsd/xdr4.h | 13 +++++++++--
3 files changed, 64 insertions(+), 8 deletions(-)
--
2.20.0
On Wed, Feb 13, 2019 at 02:05:13PM +0000, Vabhav Sharma wrote:
> ++ Kernel Mailing list
>
> From: Vabhav Sharma
> Sent: Wednesday, February 13, 2019 7:30 PM
> To: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>; Stephen Boyd <sboyd(a)kernel.org>; Viresh Kumar <viresh.kumar(a)linaro.org>
> Subject: PATCHES MERGE MISSING FROM PATCHSET
> Importance: High
>
> Hello Maintainers,
> I had sent patch series with subject "arm64: dts: NXP: add basic dts file for LX2160A SoC" in 2018.
> * This is to update you that all the dts and SoC/Board support patches are merged but below two patches are not yet merged from this patchset series.
> * clk: qoriq: increase array size of cmux_to_group<https://patchwork.kernel.org/patch/10658885/>
> * clk: qoriq: Add clockgen support for lx2160a<https://patchwork.kernel.org/patch/10658871/>
> Kindly help to merge the patches.
If patches somehow get dropped, just resend them and be sure to cc: the
proper developers/maintainers/lists as shown by
scripts/get_maintainer.pl.
thanks,
greg k-h
The patch below does not apply to the 4.20-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 479d59026fe44f89fa67efa01a4d47e00808e688 Mon Sep 17 00:00:00 2001
From: Deepak Rawat <drawat(a)vmware.com>
Date: Fri, 21 Dec 2018 14:38:35 -0800
Subject: [PATCH] drm/vmwgfx: Also check for crtc status while checking for DU
active
During modeset check it is possible to have all crtc_state's in atomic
state. Check for crtc enable status while checking for display unit
active status. Only error if enabling a crtc while display unit is not
active.
Cc: <stable(a)vger.kernel.org>
Fixes: 9da6e26c0aae: ("drm/vmwgfx: Fix a layout race condition")
Signed-off-by: Deepak Rawat <drawat(a)vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom(a)vmware.com>
Signed-off-by: Thomas Hellstrom <thellstrom(a)vmware.com>
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 5e257a600cea..ed2f67822f45 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1646,7 +1646,7 @@ static int vmw_kms_check_topology(struct drm_device *dev,
struct drm_connector_state *conn_state;
struct vmw_connector_state *vmw_conn_state;
- if (!du->pref_active) {
+ if (!du->pref_active && new_crtc_state->enable) {
ret = -EINVAL;
goto clean;
}
The patch below does not apply to the 4.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e248aa7be86e8179f20ac0931774ecd746f3f5bf Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Fri, 25 Jan 2019 16:54:54 -0500
Subject: [PATCH] svcrdma: Remove max_sge check at connect time
Two and a half years ago, the client was changed to use gathered
Send for larger inline messages, in commit 655fec6987b ("xprtrdma:
Use gathered Send for large inline messages"). Several fixes were
required because there are a few in-kernel device drivers whose
max_sge is 3, and these were broken by the change.
Apparently my memory is going, because some time later, I submitted
commit 25fd86eca11c ("svcrdma: Don't overrun the SGE array in
svc_rdma_send_ctxt"), and after that, commit f3c1fd0ee294 ("svcrdma:
Reduce max_send_sges"). These too incorrectly assumed in-kernel
device drivers would have more than a few Send SGEs available.
The fix for the server side is not the same. This is because the
fundamental problem on the server is that, whether or not the client
has provisioned a chunk for the RPC reply, the server must squeeze
even the most complex RPC replies into a single RDMA Send. Failing
in the send path because of Send SGE exhaustion should never be an
option.
Therefore, instead of failing when the send path runs out of SGEs,
switch to using a bounce buffer mechanism to handle RPC replies that
are too complex for the device to send directly. That allows us to
remove the max_sge check to enable drivers with small max_sge to
work again.
Reported-by: Don Dutile <ddutile(a)redhat.com>
Fixes: 25fd86eca11c ("svcrdma: Don't overrun the SGE array in ...")
Cc: stable(a)vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
Signed-off-by: J. Bruce Fields <bfields(a)redhat.com>
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index cf51b8f9b15f..1f200119268c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -537,6 +537,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
DMA_TO_DEVICE);
}
+/* If the xdr_buf has more elements than the device can
+ * transmit in a single RDMA Send, then the reply will
+ * have to be copied into a bounce buffer.
+ */
+static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
+ struct xdr_buf *xdr,
+ __be32 *wr_lst)
+{
+ int elements;
+
+ /* xdr->head */
+ elements = 1;
+
+ /* xdr->pages */
+ if (!wr_lst) {
+ unsigned int remaining;
+ unsigned long pageoff;
+
+ pageoff = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ ++elements;
+ remaining -= min_t(u32, PAGE_SIZE - pageoff,
+ remaining);
+ pageoff = 0;
+ }
+ }
+
+ /* xdr->tail */
+ if (xdr->tail[0].iov_len)
+ ++elements;
+
+ /* assume 1 SGE is needed for the transport header */
+ return elements >= rdma->sc_max_send_sges;
+}
+
+/* The device is not capable of sending the reply directly.
+ * Assemble the elements of @xdr into the transport header
+ * buffer.
+ */
+static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt,
+ struct xdr_buf *xdr, __be32 *wr_lst)
+{
+ unsigned char *dst, *tailbase;
+ unsigned int taillen;
+
+ dst = ctxt->sc_xprt_buf;
+ dst += ctxt->sc_sges[0].length;
+
+ memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
+ dst += xdr->head[0].iov_len;
+
+ tailbase = xdr->tail[0].iov_base;
+ taillen = xdr->tail[0].iov_len;
+ if (wr_lst) {
+ u32 xdrpad;
+
+ xdrpad = xdr_padsize(xdr->page_len);
+ if (taillen && xdrpad) {
+ tailbase += xdrpad;
+ taillen -= xdrpad;
+ }
+ } else {
+ unsigned int len, remaining;
+ unsigned long pageoff;
+ struct page **ppages;
+
+ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ pageoff = xdr->page_base & ~PAGE_MASK;
+ remaining = xdr->page_len;
+ while (remaining) {
+ len = min_t(u32, PAGE_SIZE - pageoff, remaining);
+
+ memcpy(dst, page_address(*ppages), len);
+ remaining -= len;
+ dst += len;
+ pageoff = 0;
+ }
+ }
+
+ if (taillen)
+ memcpy(dst, tailbase, taillen);
+
+ ctxt->sc_sges[0].length += xdr->len;
+ ib_dma_sync_single_for_device(rdma->sc_pd->device,
+ ctxt->sc_sges[0].addr,
+ ctxt->sc_sges[0].length,
+ DMA_TO_DEVICE);
+
+ return 0;
+}
+
/* svc_rdma_map_reply_msg - Map the buffer holding RPC message
* @rdma: controlling transport
* @ctxt: send_ctxt for the Send WR
@@ -559,8 +652,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
u32 xdr_pad;
int ret;
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst))
+ return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst);
+
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, ctxt,
xdr->head[0].iov_base,
xdr->head[0].iov_len);
@@ -591,8 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
while (remaining) {
len = min_t(u32, PAGE_SIZE - page_off, remaining);
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++,
page_off, len);
if (ret < 0)
@@ -606,8 +700,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
len = xdr->tail[0].iov_len;
tail:
if (len) {
- if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges)
- return -EIO;
+ ++ctxt->sc_cur_sge_no;
ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len);
if (ret < 0)
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 924c17d46903..57f86c63a463 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -419,12 +419,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Transport header, head iovec, tail iovec */
newxprt->sc_max_send_sges = 3;
/* Add one SGE per page list entry */
- newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE;
- if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) {
- pr_err("svcrdma: too few Send SGEs available (%d needed)\n",
- newxprt->sc_max_send_sges);
- goto errout;
- }
+ newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
+ if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
+ newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = svcrdma_max_requests;
newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;