Hello,
While trying to implement an eBPF gatekeeper program, we ran into an
issue whereas the LSM hooks are missing some relevant data.
Certain subcommands passed to the bpf() syscall can be invoked from
either the kernel or userspace. Additionally, some fields in the
bpf_attr struct contain pointers, and depending on where the
subcommand was invoked, they could point to either user or kernel
memory. One example of this is the bpf_prog_load subcommand and its
fd_array. This data is made available and used by the verifier but not
made available to the LSM subsystem. This patchset simply exposes that
information to applicable LSM hooks.
Change list:
- v6 -> v7
- use gettid/pid in lieu of getpid/tgid in test condition
- v5 -> v6
- fix regression caused by is_kernel renaming
- simplify test logic
- v4 -> v5
- merge v4 selftest breakout patch back into a single patch
- change "is_kernel" to "kernel"
- add selftest using new kernel flag
- v3 -> v4
- split out selftest changes into a separate patch
- v2 -> v3
- reorder params so that the new boolean flag is the last param
- fixup function signatures in bpf selftests
- v1 -> v2
- Pass a boolean flag in lieu of bpfptr_t
Revisions:
- v6
https://lore.kernel.org/bpf/20250308013314.719150-1-bboscaccy@linux.microso…
- v5
https://lore.kernel.org/bpf/20250307213651.3065714-1-bboscaccy@linux.micros…
- v4
https://lore.kernel.org/bpf/20250304203123.3935371-1-bboscaccy@linux.micros…
- v3
https://lore.kernel.org/bpf/20250303222416.3909228-1-bboscaccy@linux.micros…
- v2
https://lore.kernel.org/bpf/20250228165322.3121535-1-bboscaccy@linux.micros…
- v1
https://lore.kernel.org/bpf/20250226003055.1654837-1-bboscaccy@linux.micros…
Blaise Boscaccy (2):
security: Propagate caller information in bpf hooks
selftests/bpf: Add a kernel flag test for LSM bpf hook
include/linux/lsm_hook_defs.h | 6 +--
include/linux/security.h | 12 +++---
kernel/bpf/syscall.c | 10 ++---
security/security.c | 15 ++++---
security/selinux/hooks.c | 6 +--
.../selftests/bpf/prog_tests/kernel_flag.c | 43 +++++++++++++++++++
.../selftests/bpf/progs/rcu_read_lock.c | 3 +-
.../bpf/progs/test_cgroup1_hierarchy.c | 4 +-
.../selftests/bpf/progs/test_kernel_flag.c | 28 ++++++++++++
.../bpf/progs/test_kfunc_dynptr_param.c | 6 +--
.../selftests/bpf/progs/test_lookup_key.c | 2 +-
.../selftests/bpf/progs/test_ptr_untrusted.c | 2 +-
.../bpf/progs/test_task_under_cgroup.c | 2 +-
.../bpf/progs/test_verify_pkcs7_sig.c | 2 +-
14 files changed, 108 insertions(+), 33 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/kernel_flag.c
create mode 100644 tools/testing/selftests/bpf/progs/test_kernel_flag.c
--
2.48.1
A task in the kernel (task_mm_cid_work) runs somewhat periodically to
compact the mm_cid for each process. Add a test to validate that it runs
correctly and timely.
The test spawns 1 thread pinned to each CPU, then each thread, including
the main one, runs in short bursts for some time. During this period, the
mm_cids should be spanning all numbers between 0 and nproc.
At the end of this phase, a thread with high enough mm_cid (>= nproc/2)
is selected to be the new leader, all other threads terminate.
After some time, the only running thread should see 0 as mm_cid, if that
doesn't happen, the compaction mechanism didn't work and the test fails.
The test never fails if only 1 core is available, in which case, we
cannot test anything as the only available mm_cid is 0.
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Signed-off-by: Gabriele Monaco <gmonaco(a)redhat.com>
---
tools/testing/selftests/rseq/.gitignore | 1 +
tools/testing/selftests/rseq/Makefile | 2 +-
.../selftests/rseq/mm_cid_compaction_test.c | 200 ++++++++++++++++++
3 files changed, 202 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/rseq/mm_cid_compaction_test.c
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index 16496de5f6ce4..2c89f97e4f737 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -3,6 +3,7 @@ basic_percpu_ops_test
basic_percpu_ops_mm_cid_test
basic_test
basic_rseq_op_test
+mm_cid_compaction_test
param_test
param_test_benchmark
param_test_compare_twice
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 5a3432fceb586..ce1b38f46a355 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -16,7 +16,7 @@ OVERRIDE_TARGETS = 1
TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
param_test_benchmark param_test_compare_twice param_test_mm_cid \
- param_test_mm_cid_benchmark param_test_mm_cid_compare_twice
+ param_test_mm_cid_benchmark param_test_mm_cid_compare_twice mm_cid_compaction_test
TEST_GEN_PROGS_EXTENDED = librseq.so
diff --git a/tools/testing/selftests/rseq/mm_cid_compaction_test.c b/tools/testing/selftests/rseq/mm_cid_compaction_test.c
new file mode 100644
index 0000000000000..7ddde3b657dd6
--- /dev/null
+++ b/tools/testing/selftests/rseq/mm_cid_compaction_test.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+
+#include "../kselftest.h"
+#include "rseq.h"
+
+#define VERBOSE 0
+#define printf_verbose(fmt, ...) \
+ do { \
+ if (VERBOSE) \
+ printf(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+/* 0.5 s */
+#define RUNNER_PERIOD 500000
+/* Number of runs before we terminate or get the token */
+#define THREAD_RUNS 5
+
+/*
+ * Number of times we check that the mm_cid were compacted.
+ * Checks are repeated every RUNNER_PERIOD.
+ */
+#define MM_CID_COMPACT_TIMEOUT 10
+
+struct thread_args {
+ int cpu;
+ int num_cpus;
+ pthread_mutex_t *token;
+ pthread_barrier_t *barrier;
+ pthread_t *tinfo;
+ struct thread_args *args_head;
+};
+
+static void __noreturn *thread_runner(void *arg)
+{
+ struct thread_args *args = arg;
+ int i, ret, curr_mm_cid;
+ cpu_set_t cpumask;
+
+ CPU_ZERO(&cpumask);
+ CPU_SET(args->cpu, &cpumask);
+ ret = pthread_setaffinity_np(pthread_self(), sizeof(cpumask), &cpumask);
+ if (ret) {
+ errno = ret;
+ perror("Error: failed to set affinity");
+ abort();
+ }
+ pthread_barrier_wait(args->barrier);
+
+ for (i = 0; i < THREAD_RUNS; i++)
+ usleep(RUNNER_PERIOD);
+ curr_mm_cid = rseq_current_mm_cid();
+ /*
+ * We select one thread with high enough mm_cid to be the new leader.
+ * All other threads (including the main thread) will terminate.
+ * After some time, the mm_cid of the only remaining thread should
+ * converge to 0, if not, the test fails.
+ */
+ if (curr_mm_cid >= args->num_cpus / 2 &&
+ !pthread_mutex_trylock(args->token)) {
+ printf_verbose(
+ "cpu%d has mm_cid=%d and will be the new leader.\n",
+ sched_getcpu(), curr_mm_cid);
+ for (i = 0; i < args->num_cpus; i++) {
+ if (args->tinfo[i] == pthread_self())
+ continue;
+ ret = pthread_join(args->tinfo[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("Error: failed to join thread");
+ abort();
+ }
+ }
+ pthread_barrier_destroy(args->barrier);
+ free(args->tinfo);
+ free(args->token);
+ free(args->barrier);
+ free(args->args_head);
+
+ for (i = 0; i < MM_CID_COMPACT_TIMEOUT; i++) {
+ curr_mm_cid = rseq_current_mm_cid();
+ printf_verbose("run %d: mm_cid=%d on cpu%d.\n", i,
+ curr_mm_cid, sched_getcpu());
+ if (curr_mm_cid == 0)
+ exit(EXIT_SUCCESS);
+ usleep(RUNNER_PERIOD);
+ }
+ exit(EXIT_FAILURE);
+ }
+ printf_verbose("cpu%d has mm_cid=%d and is going to terminate.\n",
+ sched_getcpu(), curr_mm_cid);
+ pthread_exit(NULL);
+}
+
+int test_mm_cid_compaction(void)
+{
+ cpu_set_t affinity;
+ int i, j, ret = 0, num_threads;
+ pthread_t *tinfo;
+ pthread_mutex_t *token;
+ pthread_barrier_t *barrier;
+ struct thread_args *args;
+
+ sched_getaffinity(0, sizeof(affinity), &affinity);
+ num_threads = CPU_COUNT(&affinity);
+ tinfo = calloc(num_threads, sizeof(*tinfo));
+ if (!tinfo) {
+ perror("Error: failed to allocate tinfo");
+ return -1;
+ }
+ args = calloc(num_threads, sizeof(*args));
+ if (!args) {
+ perror("Error: failed to allocate args");
+ ret = -1;
+ goto out_free_tinfo;
+ }
+ token = malloc(sizeof(*token));
+ if (!token) {
+ perror("Error: failed to allocate token");
+ ret = -1;
+ goto out_free_args;
+ }
+ barrier = malloc(sizeof(*barrier));
+ if (!barrier) {
+ perror("Error: failed to allocate barrier");
+ ret = -1;
+ goto out_free_token;
+ }
+ if (num_threads == 1) {
+ fprintf(stderr, "Cannot test on a single cpu. "
+ "Skipping mm_cid_compaction test.\n");
+ /* only skipping the test, this is not a failure */
+ goto out_free_barrier;
+ }
+ pthread_mutex_init(token, NULL);
+ ret = pthread_barrier_init(barrier, NULL, num_threads);
+ if (ret) {
+ errno = ret;
+ perror("Error: failed to initialise barrier");
+ goto out_free_barrier;
+ }
+ for (i = 0, j = 0; i < CPU_SETSIZE && j < num_threads; i++) {
+ if (!CPU_ISSET(i, &affinity))
+ continue;
+ args[j].num_cpus = num_threads;
+ args[j].tinfo = tinfo;
+ args[j].token = token;
+ args[j].barrier = barrier;
+ args[j].cpu = i;
+ args[j].args_head = args;
+ if (!j) {
+ /* The first thread is the main one */
+ tinfo[0] = pthread_self();
+ ++j;
+ continue;
+ }
+ ret = pthread_create(&tinfo[j], NULL, thread_runner, &args[j]);
+ if (ret) {
+ errno = ret;
+ perror("Error: failed to create thread");
+ abort();
+ }
+ ++j;
+ }
+ printf_verbose("Started %d threads.\n", num_threads);
+
+ /* Also main thread will terminate if it is not selected as leader */
+ thread_runner(&args[0]);
+
+ /* only reached in case of errors */
+out_free_barrier:
+ free(barrier);
+out_free_token:
+ free(token);
+out_free_args:
+ free(args);
+out_free_tinfo:
+ free(tinfo);
+
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ if (!rseq_mm_cid_available()) {
+ fprintf(stderr, "Error: rseq_mm_cid unavailable\n");
+ return -1;
+ }
+ if (test_mm_cid_compaction())
+ return -1;
+ return 0;
+}
--
2.48.1
Replace comma between expressions with semicolons.
Using a ',' in place of a ';' can have unintended side effects.
Although that is not the case here, it is seems best to use ';'
unless ',' is intended.
Found by inspection.
No functional change intended.
Compile tested only.
Signed-off-by: Chen Ni <nichen(a)iscas.ac.cn>
---
tools/testing/selftests/bpf/prog_tests/fd_array.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index a1d52e73fb16..9add890c2d37 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -83,8 +83,8 @@ static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *ma
int err;
memset(&info, 0, len);
- info.nr_map_ids = *nr_map_ids,
- info.map_ids = ptr_to_u64(map_ids),
+ info.nr_map_ids = *nr_map_ids;
+ info.map_ids = ptr_to_u64(map_ids);
err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
--
2.25.1
Notable changes since v20:
* removed newline at the end of message in NL_SET_ERR_MSG_FMT_MOD
* dropped udp_init() and related build_protos() and directly use
.encap_destroy [instead of overriding sk->close()]
* defered peer_del() call to worker in case of transport errors, as
we may be in non-sleepable context
* used kfree() instead of kfree_rcu() when releasing socket as we
just invoked synchronize_rcu()
* fix comment in ovpn_tcp_parse()
* invoked peer->tcp.sk_cb.prot->release_cb instead of explicitly
calling tcp_release_cb. This way we don't need to export it again
* moved call to peer_put() after call to peer->tcp.sk_cb.prot->close
* moved switch(mode) inside ovpn_peers_free()
* simplified skip logic in ovpn_peers_free()
* fixed check to avoid passing a negative delay to keepalive's
schedule_work()
Please note that some patches were already reviewed/tested by a few
people. These patches have retained the tags as they have hardly been
touched.
The latest code can also be found at:
https://github.com/OpenVPN/ovpn-net-next
Thanks a lot!
Best Regards,
Antonio Quartulli
OpenVPN Inc.
To: netdev(a)vger.kernel.org
To: Eric Dumazet <edumazet(a)google.com>
To: Jakub Kicinski <kuba(a)kernel.org>
To: Paolo Abeni <pabeni(a)redhat.com>
To: Donald Hunter <donald.hunter(a)gmail.com>
To: Antonio Quartulli <antonio(a)openvpn.net>
To: Shuah Khan <shuah(a)kernel.org>
To: sd(a)queasysnail.net
To: ryazanov.s.a(a)gmail.com
To: Andrew Lunn <andrew+netdev(a)lunn.ch>
Cc: Simon Horman <horms(a)kernel.org>
Cc: linux-kernel(a)vger.kernel.org
Cc: linux-kselftest(a)vger.kernel.org
Cc: Xiao Liang <shaw.leon(a)gmail.com>
Signed-off-by: Antonio Quartulli <antonio(a)openvpn.net>
---
Antonio Quartulli (24):
net: introduce OpenVPN Data Channel Offload (ovpn)
ovpn: add basic netlink support
ovpn: add basic interface creation/destruction/management routines
ovpn: keep carrier always on for MP interfaces
ovpn: introduce the ovpn_peer object
ovpn: introduce the ovpn_socket object
ovpn: implement basic TX path (UDP)
ovpn: implement basic RX path (UDP)
ovpn: implement packet processing
ovpn: store tunnel and transport statistics
ovpn: implement TCP transport
skb: implement skb_send_sock_locked_with_flags()
ovpn: add support for MSG_NOSIGNAL in tcp_sendmsg
ovpn: implement multi-peer support
ovpn: implement peer lookup logic
ovpn: implement keepalive mechanism
ovpn: add support for updating local UDP endpoint
ovpn: add support for peer floating
ovpn: implement peer add/get/dump/delete via netlink
ovpn: implement key add/get/del/swap via netlink
ovpn: kill key and notify userspace in case of IV exhaustion
ovpn: notify userspace when a peer is deleted
ovpn: add basic ethtool support
testing/selftests: add test tool and scripts for ovpn module
Documentation/netlink/specs/ovpn.yaml | 371 +++
Documentation/netlink/specs/rt_link.yaml | 16 +
MAINTAINERS | 11 +
drivers/net/Kconfig | 15 +
drivers/net/Makefile | 1 +
drivers/net/ovpn/Makefile | 22 +
drivers/net/ovpn/bind.c | 55 +
drivers/net/ovpn/bind.h | 101 +
drivers/net/ovpn/crypto.c | 211 ++
drivers/net/ovpn/crypto.h | 145 ++
drivers/net/ovpn/crypto_aead.c | 408 ++++
drivers/net/ovpn/crypto_aead.h | 33 +
drivers/net/ovpn/io.c | 462 ++++
drivers/net/ovpn/io.h | 34 +
drivers/net/ovpn/main.c | 339 +++
drivers/net/ovpn/main.h | 14 +
drivers/net/ovpn/netlink-gen.c | 213 ++
drivers/net/ovpn/netlink-gen.h | 41 +
drivers/net/ovpn/netlink.c | 1249 ++++++++++
drivers/net/ovpn/netlink.h | 18 +
drivers/net/ovpn/ovpnpriv.h | 57 +
drivers/net/ovpn/peer.c | 1352 +++++++++++
drivers/net/ovpn/peer.h | 163 ++
drivers/net/ovpn/pktid.c | 129 ++
drivers/net/ovpn/pktid.h | 87 +
drivers/net/ovpn/proto.h | 118 +
drivers/net/ovpn/skb.h | 61 +
drivers/net/ovpn/socket.c | 244 ++
drivers/net/ovpn/socket.h | 49 +
drivers/net/ovpn/stats.c | 21 +
drivers/net/ovpn/stats.h | 47 +
drivers/net/ovpn/tcp.c | 592 +++++
drivers/net/ovpn/tcp.h | 36 +
drivers/net/ovpn/udp.c | 442 ++++
drivers/net/ovpn/udp.h | 25 +
include/linux/skbuff.h | 2 +
include/uapi/linux/if_link.h | 15 +
include/uapi/linux/ovpn.h | 110 +
include/uapi/linux/udp.h | 1 +
net/core/skbuff.c | 18 +-
net/ipv6/af_inet6.c | 1 +
net/ipv6/udp.c | 1 +
tools/testing/selftests/Makefile | 1 +
tools/testing/selftests/net/ovpn/.gitignore | 2 +
tools/testing/selftests/net/ovpn/Makefile | 31 +
tools/testing/selftests/net/ovpn/common.sh | 92 +
tools/testing/selftests/net/ovpn/config | 10 +
tools/testing/selftests/net/ovpn/data64.key | 5 +
tools/testing/selftests/net/ovpn/ovpn-cli.c | 2395 ++++++++++++++++++++
tools/testing/selftests/net/ovpn/tcp_peers.txt | 5 +
.../testing/selftests/net/ovpn/test-chachapoly.sh | 9 +
.../selftests/net/ovpn/test-close-socket-tcp.sh | 9 +
.../selftests/net/ovpn/test-close-socket.sh | 45 +
tools/testing/selftests/net/ovpn/test-float.sh | 9 +
tools/testing/selftests/net/ovpn/test-tcp.sh | 9 +
tools/testing/selftests/net/ovpn/test.sh | 113 +
tools/testing/selftests/net/ovpn/udp_peers.txt | 5 +
57 files changed, 10065 insertions(+), 5 deletions(-)
---
base-commit: fb05579a176f7bccc8d279665fc0e46dfed43dfb
change-id: 20250304-b4-ovpn-tmp-153379e78603
Best regards,
--
Antonio Quartulli <antonio(a)openvpn.net>
┌────────────┐ ┌───────────────────────────────────┐ ┌────────────────┐
│ │ │ │ │ │
│ │ │ PCI Endpoint │ │ PCI Host │
│ │ │ │ │ │
│ │◄──┤ 1.platform_msi_domain_alloc_irqs()│ │ │
│ │ │ │ │ │
│ MSI ├──►│ 2.write_msi_msg() ├──►├─BAR<n> │
│ Controller │ │ update doorbell register address│ │ │
│ │ │ for BAR │ │ │
│ │ │ │ │ 3. Write BAR<n>│
│ │◄──┼───────────────────────────────────┼───┤ │
│ │ │ │ │ │
│ ├──►│ 4.Irq Handle │ │ │
│ │ │ │ │ │
│ │ │ │ │ │
└────────────┘ └───────────────────────────────────┘ └────────────────┘
This patches based on old https://lore.kernel.org/imx/20221124055036.1630573-1-Frank.Li@nxp.com/
Original patch only target to vntb driver. But actually it is common
method.
This patches add new API to pci-epf-core, so any EP driver can use it.
Previous v2 discussion here.
https://lore.kernel.org/imx/20230911220920.1817033-1-Frank.Li@nxp.com/
Changes in v15:
- rebase to v6.14-rc1
- fix build issue find by kernel test robot
- Link to v14: https://lore.kernel.org/r/20250207-ep-msi-v14-0-9671b136f2b8@nxp.com
Changes in v14:
Marc Zyngier raised concerns about adding DOMAIN_BUS_DEVICE_PCI_EP_MSI. As
a result, the approach has been reverted to the v9 method. However, there
are several improvements:
MSI now supports msi-map in addition to msi-parent.
- The struct device: id is used as the endpoint function (EPF) device
identity to map to the stream ID (sideband information).
- The EPC device tree source (DTS) utilizes msi-map to provide such
information.
- The EPF device's of_node is set to the EPC controller’s node. This
approach is commonly used for multi-function device (MFD) platform child
devices, allowing them to inherit properties from the MFD device’s DTS,
such as reset-cells and gpio-cells. This method is well-suited for the
current case, as the EPF is inherently created/binded to the EPC and
should inherit the EPC’s DTS node properties.
Additionally:
Since the basic IMX95 LUT support has already been merged into the
mainline, a DTS and driver increment patch is added to complete the
solution. The patch is rebased onto the latest linux-next tree and
aligned with the new pcitest framework.
- Link to v13: https://lore.kernel.org/r/20241218-ep-msi-v13-0-646e2192dc24@nxp.com
Changes in v13:
- Change to use DOMAIN_BUS_PCI_DEVICE_EP_MSI
- Change request id as func | vfunc << 3
- Remove IRQ_DOMAIN_MSI_IMMUTABLE
Thomas Gleixner:
I hope capture all your points in review comments. If missed, let me know.
- Link to v12: https://lore.kernel.org/r/20241211-ep-msi-v12-0-33d4532fa520@nxp.com
Changes in v12:
- Change to use IRQ_DOMAIN_MSI_IMMUTABLE and add help function
irq_domain_msi_is_immuatble().
- split PCI: endpoint: pci-ep-msi: Add MSI address/data pair mutable check to 3 patches
- Link to v11: https://lore.kernel.org/r/20241209-ep-msi-v11-0-7434fa8397bd@nxp.com
Changes in v11:
- Change to use MSI_FLAG_MSG_IMMUTABLE
- Link to v10: https://lore.kernel.org/r/20241204-ep-msi-v10-0-87c378dbcd6d@nxp.com
Changes in v10:
Thomas Gleixner:
There are big change in pci-ep-msi.c. I am sure if go on the
corrent path. The key improvement is remove only 1 function devices's
limitation.
I use new patch for imutable check, which relative additional
feature compared to base enablement patch.
- Remove patch Add msi_remove_device_irq_domain() in platform_device_msi_free_irqs_all()
- Add new patch irqchip/gic-v3-its: Avoid overwriting msi_prepare callback if provided by msi_domain_info
- Remove only support 1 endpoint function limiation.
- Create one MSI domain for each endpoint function devices.
- Use "msi-map" in pci ep controler node, instead of of msi-parent. first
argument is
(func_no << 8 | vfunc_no)
- Link to v9: https://lore.kernel.org/r/20241203-ep-msi-v9-0-a60dbc3f15dd@nxp.com
Changes in v9
- Add patch platform-msi: Add msi_remove_device_irq_domain() in platform_device_msi_free_irqs_all()
- Remove patch PCI: endpoint: Add pci_epc_get_fn() API for customizable filtering
- Remove API pci_epf_align_inbound_addr_lo_hi
- Move doorbell_alloc in to doorbell_enable function.
- Link to v8: https://lore.kernel.org/r/20241116-ep-msi-v8-0-6f1f68ffd1bb@nxp.com
Changes in v8:
- update helper function name to pci_epf_align_inbound_addr()
- Link to v7: https://lore.kernel.org/r/20241114-ep-msi-v7-0-d4ac7aafbd2c@nxp.com
Changes in v7:
- Add helper function pci_epf_align_addr();
- Link to v6: https://lore.kernel.org/r/20241112-ep-msi-v6-0-45f9722e3c2a@nxp.com
Changes in v6:
- change doorbell_addr to doorbell_offset
- use round_down()
- add Niklas's test by tag
- rebase to pci/endpoint
- Link to v5: https://lore.kernel.org/r/20241108-ep-msi-v5-0-a14951c0d007@nxp.com
Changes in v5:
- Move request_irq to epf test function driver for more flexiable user case
- Add fixed size bar handler
- Some minor improvememtn to see each patches's changelog.
- Link to v4: https://lore.kernel.org/r/20241031-ep-msi-v4-0-717da2d99b28@nxp.com
Changes in v4:
- Remove patch genirq/msi: Add cleanup guard define for msi_lock_descs()/msi_unlock_descs()
- Use new method to avoid compatible problem.
Add new command DOORBELL_ENABLE and DOORBELL_DISABLE.
pcitest -B send DOORBELL_ENABLE first, EP test function driver try to
remap one of BAR_N (except test register bar) to ITS MSI MMIO space. Old
driver don't support new command, so failure return, not side effect.
After test, DOORBELL_DISABLE command send out to recover original map, so
pcitest bar test can pass as normal.
- Other detail change see each patches's change log
- Link to v3: https://lore.kernel.org/r/20241015-ep-msi-v3-0-cedc89a16c1a@nxp.com
Change from v2 to v3
- Fixed manivannan's comments
- Move common part to pci-ep-msi.c and pci-ep-msi.h
- rebase to 6.12-rc1
- use RevID to distingiush old version
mkdir /sys/kernel/config/pci_ep/functions/pci_epf_test/func1
echo 16 > /sys/kernel/config/pci_ep/functions/pci_epf_test/func1/msi_interrupts
echo 0x080c > /sys/kernel/config/pci_ep/functions/pci_epf_test/func1/deviceid
echo 0x1957 > /sys/kernel/config/pci_ep/functions/pci_epf_test/func1/vendorid
echo 1 > /sys/kernel/config/pci_ep/functions/pci_epf_test/func1/revid
^^^^^^ to enable platform msi support.
ln -s /sys/kernel/config/pci_ep/functions/pci_epf_test/func1 /sys/kernel/config/pci_ep/controllers/4c380000.pcie-ep
- use new device ID, which identify support doorbell to avoid broken
compatility.
Enable doorbell support only for PCI_DEVICE_ID_IMX8_DB, while other devices
keep the same behavior as before.
EP side RC with old driver RC with new driver
PCI_DEVICE_ID_IMX8_DB no probe doorbell enabled
Other device ID doorbell disabled* doorbell disabled*
* Behavior remains unchanged.
Change from v1 to v2
- Add missed patch for endpont/pci-epf-test.c
- Move alloc and free to epc driver from epf.
- Provide general help function for EPC driver to alloc platform msi irq.
- Fixed manivannan's comments.
Signed-off-by: Frank Li <Frank.Li(a)nxp.com>
---
Frank Li (15):
platform-msi: Add msi_remove_device_irq_domain() in platform_device_msi_free_irqs_all()
irqdomain: Add IRQ_DOMAIN_FLAG_MSI_IMMUTABLE and irq_domain_is_msi_immutable()
irqchip/gic-v3-its: Set IRQ_DOMAIN_FLAG_MSI_IMMUTABLE for ITS
irqchip/gic-v3-its: Add support for device tree msi-map and msi-mask
PCI: endpoint: Set ID and of_node for function driver
PCI: endpoint: Add RC-to-EP doorbell support using platform MSI controller
PCI: endpoint: pci-ep-msi: Add MSI address/data pair mutable check
PCI: endpoint: Add pci_epf_align_inbound_addr() helper for address alignment
PCI: endpoint: pci-epf-test: Add doorbell test support
misc: pci_endpoint_test: Add doorbell test case
selftests: pci_endpoint: Add doorbell test case
pci: imx6: Add helper function imx_pcie_add_lut_by_rid()
pci: imx6: Add LUT setting for MSI/IOMMU in Endpoint mode
arm64: dts: imx95: Add msi-map for pci-ep device
arm64: dts: imx95-19x19-evk: Add PCIe1 endpoint function overlay file
arch/arm64/boot/dts/freescale/Makefile | 3 +
.../dts/freescale/imx95-19x19-evk-pcie1-ep.dtso | 21 ++++
arch/arm64/boot/dts/freescale/imx95.dtsi | 1 +
drivers/base/platform-msi.c | 1 +
drivers/irqchip/irq-gic-v3-its-msi-parent.c | 8 ++
drivers/irqchip/irq-gic-v3-its.c | 2 +-
drivers/misc/pci_endpoint_test.c | 81 +++++++++++++
drivers/pci/controller/dwc/pci-imx6.c | 25 ++--
drivers/pci/endpoint/Makefile | 1 +
drivers/pci/endpoint/functions/pci-epf-test.c | 132 +++++++++++++++++++++
drivers/pci/endpoint/pci-ep-msi.c | 90 ++++++++++++++
drivers/pci/endpoint/pci-epf-core.c | 48 ++++++++
include/linux/irqdomain.h | 7 ++
include/linux/pci-ep-msi.h | 28 +++++
include/linux/pci-epf.h | 21 ++++
include/uapi/linux/pcitest.h | 1 +
.../selftests/pci_endpoint/pci_endpoint_test.c | 25 ++++
17 files changed, 486 insertions(+), 9 deletions(-)
---
base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
change-id: 20241010-ep-msi-8b4cab33b1be
Best regards,
---
Frank Li <Frank.Li(a)nxp.com>
The first fixes setting incorrect skb->truesize.
When xdp-mb prog returns XDP_PASS, skb is allocated and initialized.
Currently, The truesize is calculated as BNXT_RX_PAGE_SIZE *
sinfo->nr_frags, but sinfo->nr_frags is flushed by napi_build_skb().
So, it stores sinfo before calling napi_build_skb() and then use it
for calculate truesize.
The second fixes kernel panic in the bnxt_queue_mem_alloc().
The bnxt_queue_mem_alloc() accesses rx ring descriptor.
rx ring descriptors are allocated when the interface is up and it's
freed when the interface is down.
So, if bnxt_queue_mem_alloc() is called when the interface is down,
kernel panic occurs.
This patch makes the bnxt_queue_mem_alloc() return -ENETDOWN if rx ring
descriptors are not allocated.
The third patch fixes kernel panic in the bnxt_queue_{start | stop}().
When a queue is restarted bnxt_queue_{start | stop}() are called.
These functions set MRU to 0 to stop packet flow and then to set up the
remaining things.
MRU variable is a member of vnic_info[] the first vnic_info is for
default and the second is for ntuple.
The first vnic_info is always allocated when interface is up, but the
second is allocated only when ntuple is enabled.
(ethtool -K eth0 ntuple <on | off>).
Currently, the bnxt_queue_{start | stop}() access
vnic_info[BNXT_VNIC_NTUPLE] regardless of whether ntuple is enabled or
not.
So kernel panic occurs.
This patch make the bnxt_queue_{start | stop}() use bp->nr_vnics instead
of BNXT_VNIC_NTUPLE.
The fourth patch fixes a warning due to checksum state.
The bnxt_rx_pkt() checks whether skb->ip_summed is not CHECKSUM_NONE
before updating ip_summed. if ip_summed is not CHECKSUM_NONE, it WARNS
about it. However, the bnxt_xdp_build_skb() is called in XDP-MB-PASS
path and it updates ip_summed earlier than bnxt_rx_pkt().
So, in the XDP-MB-PASS path, the bnxt_rx_pkt() always warns about
checksum.
Updating ip_summed at the bnxt_xdp_build_skb() is unnecessary and
duplicate, so it is removed.
The fifth patch fixes a kernel panic in the
bnxt_get_queue_stats{rx | tx}().
The bnxt_get_queue_stats{rx | tx}() callback functions are called when
a queue is resetting.
These internally access rx and tx rings without null check, but rings
are allocated and initialized when the interface is up.
So, these functions are called when the interface is down, it
occurs a kernel panic.
The sixth patch fixes memory leak in queue reset logic.
When a queue is resetting, tpa_info is allocated for the new queue and
tpa_info for an old queue is not used anymore.
So it should be freed, but not.
The seventh patch makes net_devmem_unbind_dmabuf() ignore -ENETDOWN.
When devmem socket is closed, net_devmem_unbind_dmabuf() is called to
unbind/release resources.
If interface is down, the driver returns -ENETDOWN.
The -ENETDOWN return value is not an actual error, because the interface
will release resources when the interface is down.
So, net_devmem_unbind_dmabuf() needs to ignore -ENETDOWN.
The last patch adds XDP testcases to
tools/testing/selftests/drivers/net/ping.py.
v3:
- Copy nr_frags instead of full copy. (1/8)
- Add Review tags from Somnath. (3/8)
- Add new patch for fixing kernel panic in the
bnxt_get_queue_stats{rx | tx}(). (5/8)
- Add new patch for fixing memory leak in queue reset. (6/8)
v2:
- Do not use num_frags in the bnxt_xdp_build_skb(). (1/6)
- Add Review tags from Somnath and Jakub. (2/6)
- Add new patch for fixing checksum warning. (4/6)
- Add new patch for fixing warning in net_devmem_unbind_dmabuf(). (5/6)
- Add new XDP testcases to ping.py (6/6)
Taehee Yoo (8):
eth: bnxt: fix truesize for mb-xdp-pass case
eth: bnxt: return fail if interface is down in bnxt_queue_mem_alloc()
eth: bnxt: do not use BNXT_VNIC_NTUPLE unconditionally in queue
restart logic
eth: bnxt: do not update checksum in bnxt_xdp_build_skb()
eth: bnxt: fix kernel panic in the bnxt_get_queue_stats{rx | tx}
eth: bnxt: fix memory leak in queue reset
net: devmem: do not WARN conditionally after netdev_rx_queue_restart()
selftests: drv-net: add xdp cases for ping.py
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 25 ++-
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 13 +-
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 3 +-
net/core/devmem.c | 4 +-
tools/testing/selftests/drivers/net/ping.py | 200 ++++++++++++++++--
.../testing/selftests/net/lib/xdp_dummy.bpf.c | 6 +
6 files changed, 220 insertions(+), 31 deletions(-)
--
2.34.1
Hello,
While trying to implement an eBPF gatekeeper program, we ran into an
issue whereas the LSM hooks are missing some relevant data.
Certain subcommands passed to the bpf() syscall can be invoked from
either the kernel or userspace. Additionally, some fields in the
bpf_attr struct contain pointers, and depending on where the
subcommand was invoked, they could point to either user or kernel
memory. One example of this is the bpf_prog_load subcommand and its
fd_array. This data is made available and used by the verifier but not
made available to the LSM subsystem. This patchset simply exposes that
information to applicable LSM hooks.
Change list:
- v5 -> v6
- fix regression caused by is_kernel renaming
- simplify test logic
- v4 -> v5
- merge v4 selftest breakout patch back into a single patch
- change "is_kernel" to "kernel"
- add selftest using new kernel flag
- v3 -> v4
- split out selftest changes into a separate patch
- v2 -> v3
- reorder params so that the new boolean flag is the last param
- fixup function signatures in bpf selftests
- v1 -> v2
- Pass a boolean flag in lieu of bpfptr_t
Revisions:
- v5
https://lore.kernel.org/bpf/20250307213651.3065714-1-bboscaccy@linux.micros…
- v4
https://lore.kernel.org/bpf/20250304203123.3935371-1-bboscaccy@linux.micros…
- v3
https://lore.kernel.org/bpf/20250303222416.3909228-1-bboscaccy@linux.micros…
- v2
https://lore.kernel.org/bpf/20250228165322.3121535-1-bboscaccy@linux.micros…
- v1
https://lore.kernel.org/bpf/20250226003055.1654837-1-bboscaccy@linux.micros…
Blaise Boscaccy (2):
security: Propagate caller information in bpf hooks
selftests/bpf: Add a kernel flag test for LSM bpf hook
include/linux/lsm_hook_defs.h | 6 +--
include/linux/security.h | 12 +++---
kernel/bpf/syscall.c | 10 ++---
security/security.c | 15 ++++---
security/selinux/hooks.c | 6 +--
.../selftests/bpf/prog_tests/kernel_flag.c | 43 +++++++++++++++++++
.../selftests/bpf/progs/rcu_read_lock.c | 3 +-
.../bpf/progs/test_cgroup1_hierarchy.c | 4 +-
.../selftests/bpf/progs/test_kernel_flag.c | 28 ++++++++++++
.../bpf/progs/test_kfunc_dynptr_param.c | 6 +--
.../selftests/bpf/progs/test_lookup_key.c | 2 +-
.../selftests/bpf/progs/test_ptr_untrusted.c | 2 +-
.../bpf/progs/test_task_under_cgroup.c | 2 +-
.../bpf/progs/test_verify_pkcs7_sig.c | 2 +-
14 files changed, 108 insertions(+), 33 deletions(-)
create mode 100644 tools/testing/selftests/bpf/prog_tests/kernel_flag.c
create mode 100644 tools/testing/selftests/bpf/progs/test_kernel_flag.c
--
2.48.1