On 06/11, Bobby Eshleman wrote:
From: Bobby Eshleman bobbyeshleman@meta.com
Add -b <bytes> to request a non-default niov size via NETDEV_A_DMABUF_RX_BUF_SIZE. When the value exceeds PAGE_SIZE, udmabuf_alloc() switches to an MFD_HUGETLB-backed memfd so each 2 MB hugepage produces one naturally-aligned sg entry.
Reject values > 2 MB up front: MFD_HUGETLB + udmabuf can only guarantee 2 MB per sg entry (one hugepage), so a larger rx_buf_size would fail the per-sg length/alignment check.
Add CONFIG_HUGETLBFS=y to drivers/net/hw/config so the new path is reachable in the CI kernels built for these tests.
Signed-off-by: Bobby Eshleman bobbyeshleman@meta.com
tools/testing/selftests/drivers/net/hw/config | 1 + tools/testing/selftests/drivers/net/hw/ncdevmem.c | 49 +++++++++++++++++++++-- 2 files changed, 47 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index cd20024218cd..ed8642b68094 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -3,6 +3,7 @@ CONFIG_FAIL_FUNCTION=y CONFIG_FAULT_INJECTION=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FUNCTION_ERROR_INJECTION=y +CONFIG_HUGETLBFS=y CONFIG_INET6_ESP=y CONFIG_INET6_ESP_OFFLOAD=y CONFIG_INET_ESP=y diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index d96e8a3b5a65..325c128191e2 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -61,6 +61,7 @@ #include <sys/time.h> #include <linux/memfd.h> +#include <sys/param.h> #include <linux/dma-buf.h> #include <linux/errqueue.h> #include <linux/udmabuf.h> @@ -79,6 +80,7 @@ #define PAGE_SHIFT 12 #define TEST_PREFIX "ncdevmem" #define NUM_PAGES 16000 +#define MB(x) ((x) << 20) #ifndef MSG_SOCK_DEVMEM #define MSG_SOCK_DEVMEM 0x2000000 @@ -100,6 +102,7 @@ static unsigned int dmabuf_id; static uint32_t tx_dmabuf_id; static int waittime_ms = 500; static bool fail_on_linear; +static uint32_t rx_buf_size; /* System state loaded by current_config_load() */ #define MAX_FLOWS 8 @@ -142,6 +145,7 @@ static struct memory_buffer *udmabuf_alloc(size_t size) { struct udmabuf_create create; struct memory_buffer *ctx;
- unsigned int memfd_flags; int ret;
ctx = malloc(sizeof(*ctx)); @@ -156,9 +160,14 @@ static struct memory_buffer *udmabuf_alloc(size_t size) goto err_free_ctx; }
- ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
- memfd_flags = MFD_ALLOW_SEALING;
[..]
- if (rx_buf_size > (uint32_t)getpagesize())
What's the logic behind explicit (uint32_t) cast? uint vs int comparisons should promote the int to uint automatically?
memfd_flags |= MFD_HUGETLB | MFD_HUGE_2MB;- ctx->memfd = memfd_create("udmabuf-test", memfd_flags); if (ctx->memfd < 0) {
pr_err("[skip,no-memfd]");
pr_err("[skip,no-memfd%s]", goto err_close_dev; }(memfd_flags & MFD_HUGETLB) ? " (need hugepages)" : "");@@ -168,6 +177,11 @@ static struct memory_buffer *udmabuf_alloc(size_t size) goto err_close_memfd; }
- if (memfd_flags & MFD_HUGETLB) {
size = roundup(size, MB(2));ctx->size = size;- }
- ret = ftruncate(ctx->memfd, size); if (ret == -1) { pr_err("[FAIL,memfd-truncate]");
@@ -699,6 +713,8 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, netdev_bind_rx_req_set_ifindex(req, ifindex); netdev_bind_rx_req_set_fd(req, dmabuf_fd); __netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
- if (rx_buf_size)
netdev_bind_rx_req_set_rx_buf_size(req, rx_buf_size);rsp = netdev_bind_rx(*ys, req); if (!rsp) { @@ -1411,7 +1427,7 @@ int main(int argc, char *argv[]) int is_server = 0, opt; int ret, err = 1;
- while ((opt = getopt(argc, argv, "Lls:c:p:v:q:t:f:z:n")) != -1) {
- while ((opt = getopt(argc, argv, "Lls:c:p:v:q:t:f:z:nb:")) != -1) { switch (opt) { case 'L': fail_on_linear = true;
@@ -1446,6 +1462,33 @@ int main(int argc, char *argv[]) case 'n': skip_config = 1; break;
case 'b': {char *endp;unsigned long val;
Christmas tree here as well?
errno = 0;val = strtoul(optarg, &endp, 0);
[..]
if (errno || endp == optarg || *endp || val == 0 ||val > UINT32_MAX) {pr_err("invalid rx_buf_size: %s", optarg);return 1;}
This is too sophisticated :-/ Just (if val == UINT32_MAX && errno == ERANGE) ? (you're looking for an overflow here supposedly?)
[..]
if (val & (val - 1)) {pr_err("rx_buf_size must be a power of 2");return 1;}if (val < (unsigned long)getpagesize()) {pr_err("rx_buf_size must be >= PAGE_SIZE (%d)",getpagesize());return 1;}if (val > MB(2)) {pr_err("rx_buf_size > 2 MB not supported");return 1;}
We already check these on the kernel size, so should be ok to drop?