Hi Greg, Sasha,
This batch contains a backport for recent fixes already upstream for 6.1.x.
The following list shows the backported patches, I am using original commit IDs for reference:
1) 3c13725f43dc ("netfilter: nf_tables: bail out if stateful expression provides no .clone")
2) fa23e0d4b756 ("netfilter: nf_tables: allow clone callbacks to sleep")
3) cff3bd012a95 ("netfilter: nf_tables: prefer nft_chain_validate")
Please, apply, Thanks
Florian Westphal (2): netfilter: nf_tables: allow clone callbacks to sleep netfilter: nf_tables: prefer nft_chain_validate
Pablo Neira Ayuso (1): netfilter: nf_tables: bail out if stateful expression provides no .clone
include/net/netfilter/nf_tables.h | 4 +- net/netfilter/nf_tables_api.c | 172 ++++-------------------------- net/netfilter/nft_connlimit.c | 4 +- net/netfilter/nft_counter.c | 4 +- net/netfilter/nft_dynset.c | 2 +- net/netfilter/nft_last.c | 4 +- net/netfilter/nft_limit.c | 14 +-- net/netfilter/nft_quota.c | 4 +- 8 files changed, 42 insertions(+), 166 deletions(-)
commit 3c13725f43dcf43ad8a9bcd6a9f12add19a8f93e upstream.
All existing NFT_EXPR_STATEFUL provide a .clone interface, remove fallback to copy content of stateful expression since this is never exercised and bail out if .clone interface is not defined.
Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- net/netfilter/nf_tables_api.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d18b698139ca..85d7250a29a2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3123,14 +3123,13 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) { int err;
- if (src->ops->clone) { - dst->ops = src->ops; - err = src->ops->clone(dst, src); - if (err < 0) - return err; - } else { - memcpy(dst, src, src->ops->size); - } + if (WARN_ON_ONCE(!src->ops->clone)) + return -EINVAL; + + dst->ops = src->ops; + err = src->ops->clone(dst, src); + if (err < 0) + return err;
__module_get(src->ops->type->owner);
From: Florian Westphal fw@strlen.de
commit fa23e0d4b756d25829e124d6b670a4c6bbd4bf7e upstream.
Sven Auhagen reports transaction failures with following error: ./main.nft:13:1-26: Error: Could not process rule: Cannot allocate memory percpu: allocation failed, size=16 align=8 atomic=1, atomic alloc failed, no space left
This points to failing pcpu allocation with GFP_ATOMIC flag. However, transactions happen from user context and are allowed to sleep.
One case where we can call into percpu allocator with GFP_ATOMIC is nft_counter expression.
Normally this happens from control plane, so this could use GFP_KERNEL instead. But one use case, element insertion from packet path, needs to use GFP_ATOMIC allocations (nft_dynset expression).
At this time, .clone callbacks always use GFP_ATOMIC for this reason.
Add gfp_t argument to the .clone function and pass GFP_KERNEL or GFP_ATOMIC flag depending on context, this allows all clone memory allocations to sleep for the normal (transaction) case.
Cc: Sven Auhagen sven.auhagen@voleatech.de Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- include/net/netfilter/nf_tables.h | 4 ++-- net/netfilter/nf_tables_api.c | 8 ++++---- net/netfilter/nft_connlimit.c | 4 ++-- net/netfilter/nft_counter.c | 4 ++-- net/netfilter/nft_dynset.c | 2 +- net/netfilter/nft_last.c | 4 ++-- net/netfilter/nft_limit.c | 14 ++++++++------ net/netfilter/nft_quota.c | 4 ++-- 8 files changed, 23 insertions(+), 21 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 9a80d0251d8f..e365302fed95 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -387,7 +387,7 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) return (void *)expr->data; }
-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); +int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, const struct nft_expr *expr); @@ -889,7 +889,7 @@ struct nft_expr_ops { struct nft_regs *regs, const struct nft_pktinfo *pkt); int (*clone)(struct nft_expr *dst, - const struct nft_expr *src); + const struct nft_expr *src, gfp_t gfp); unsigned int size;
int (*init)(const struct nft_ctx *ctx, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 85d7250a29a2..8f8470ba6c37 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3119,7 +3119,7 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, return ERR_PTR(err); }
-int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) +int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src, gfp_t gfp) { int err;
@@ -3127,7 +3127,7 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) return -EINVAL;
dst->ops = src->ops; - err = src->ops->clone(dst, src); + err = src->ops->clone(dst, src, gfp); if (err < 0) return err;
@@ -6059,7 +6059,7 @@ int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, if (!expr) goto err_expr;
- err = nft_expr_clone(expr, set->exprs[i]); + err = nft_expr_clone(expr, set->exprs[i], GFP_KERNEL_ACCOUNT); if (err < 0) { kfree(expr); goto err_expr; @@ -6098,7 +6098,7 @@ static int nft_set_elem_expr_setup(struct nft_ctx *ctx,
for (i = 0; i < num_exprs; i++) { expr = nft_setelem_expr_at(elem_expr, elem_expr->size); - err = nft_expr_clone(expr, expr_array[i]); + err = nft_expr_clone(expr, expr_array[i], GFP_KERNEL_ACCOUNT); if (err < 0) goto err_elem_expr_setup;
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index d657f999a11b..793994622b87 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -209,12 +209,12 @@ static void nft_connlimit_destroy(const struct nft_ctx *ctx, nft_connlimit_do_destroy(ctx, priv); }
-static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_connlimit *priv_dst = nft_expr_priv(dst); struct nft_connlimit *priv_src = nft_expr_priv(src);
- priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC); + priv_dst->list = kmalloc(sizeof(*priv_dst->list), gfp); if (!priv_dst->list) return -ENOMEM;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index f4d3573e8782..b5fe7fe4b60d 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -225,7 +225,7 @@ static void nft_counter_destroy(const struct nft_ctx *ctx, nft_counter_do_destroy(priv); }
-static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_counter_percpu_priv *priv = nft_expr_priv(src); struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst); @@ -235,7 +235,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
nft_counter_fetch(priv, &total);
- cpu_stats = alloc_percpu_gfp(struct nft_counter, GFP_ATOMIC); + cpu_stats = alloc_percpu_gfp(struct nft_counter, gfp); if (cpu_stats == NULL) return -ENOMEM;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index a470e5f61284..953aba871f45 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -35,7 +35,7 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv,
for (i = 0; i < priv->num_exprs; i++) { expr = nft_setelem_expr_at(elem_expr, elem_expr->size); - if (nft_expr_clone(expr, priv->expr_array[i]) < 0) + if (nft_expr_clone(expr, priv->expr_array[i], GFP_ATOMIC) < 0) return -1;
elem_expr->size += priv->expr_array[i]->ops->size; diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index eaa54964cf23..2ff387b649cf 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -101,12 +101,12 @@ static void nft_last_destroy(const struct nft_ctx *ctx, kfree(priv->last); }
-static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_last_priv *priv_dst = nft_expr_priv(dst); struct nft_last_priv *priv_src = nft_expr_priv(src);
- priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC); + priv_dst->last = kzalloc(sizeof(*priv_dst->last), gfp); if (!priv_dst->last) return -ENOMEM;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 36ded7d43262..a263596da99a 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -150,7 +150,7 @@ static void nft_limit_destroy(const struct nft_ctx *ctx, }
static int nft_limit_clone(struct nft_limit_priv *priv_dst, - const struct nft_limit_priv *priv_src) + const struct nft_limit_priv *priv_src, gfp_t gfp) { priv_dst->tokens_max = priv_src->tokens_max; priv_dst->rate = priv_src->rate; @@ -158,7 +158,7 @@ static int nft_limit_clone(struct nft_limit_priv *priv_dst, priv_dst->burst = priv_src->burst; priv_dst->invert = priv_src->invert;
- priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC); + priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), gfp); if (!priv_dst->limit) return -ENOMEM;
@@ -222,14 +222,15 @@ static void nft_limit_pkts_destroy(const struct nft_ctx *ctx, nft_limit_destroy(ctx, &priv->limit); }
-static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src, + gfp_t gfp) { struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst); struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
priv_dst->cost = priv_src->cost;
- return nft_limit_clone(&priv_dst->limit, &priv_src->limit); + return nft_limit_clone(&priv_dst->limit, &priv_src->limit, gfp); }
static struct nft_expr_type nft_limit_type; @@ -280,12 +281,13 @@ static void nft_limit_bytes_destroy(const struct nft_ctx *ctx, nft_limit_destroy(ctx, priv); }
-static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src, + gfp_t gfp) { struct nft_limit_priv *priv_dst = nft_expr_priv(dst); struct nft_limit_priv *priv_src = nft_expr_priv(src);
- return nft_limit_clone(priv_dst, priv_src); + return nft_limit_clone(priv_dst, priv_src, gfp); }
static const struct nft_expr_ops nft_limit_bytes_ops = { diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 410a5fcf8830..ef8e7cdbd0e6 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -232,7 +232,7 @@ static void nft_quota_destroy(const struct nft_ctx *ctx, return nft_quota_do_destroy(ctx, priv); }
-static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) +static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src, gfp_t gfp) { struct nft_quota *priv_dst = nft_expr_priv(dst); struct nft_quota *priv_src = nft_expr_priv(src); @@ -240,7 +240,7 @@ static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src) priv_dst->quota = priv_src->quota; priv_dst->flags = priv_src->flags;
- priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC); + priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), gfp); if (!priv_dst->consumed) return -ENOMEM;
From: Florian Westphal fw@strlen.de
commit cff3bd012a9512ac5ed858d38e6ed65f6391008c upstream.
nft_chain_validate already performs loop detection because a cycle will result in a call stack overflow (ctx->level >= NFT_JUMP_STACK_SIZE).
It also follows maps via ->validate callback in nft_lookup, so there appears no reason to iterate the maps again.
nf_tables_check_loops() and all its helper functions can be removed. This improves ruleset load time significantly, from 23s down to 12s.
This also fixes a crash bug. Old loop detection code can result in unbounded recursion:
BUG: TASK stack guard page was hit at .... Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN CPU: 4 PID: 1539 Comm: nft Not tainted 6.10.0-rc5+ #1 [..]
with a suitable ruleset during validation of register stores.
I can't see any actual reason to attempt to check for this from nft_validate_register_store(), at this point the transaction is still in progress, so we don't have a full picture of the rule graph.
For nf-next it might make sense to either remove it or make this depend on table->validate_state in case we could catch an error earlier (for improved error reporting to userspace).
Fixes: 20a69341f2d0 ("netfilter: nf_tables: add netlink set API") Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- net/netfilter/nf_tables_api.c | 151 +++------------------------------- 1 file changed, 13 insertions(+), 138 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8f8470ba6c37..10180d280e79 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3515,6 +3515,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r nf_tables_rule_destroy(ctx, rule); }
+/** nft_chain_validate - loop detection and hook validation + * + * @ctx: context containing call depth and base chain + * @chain: chain to validate + * + * Walk through the rules of the given chain and chase all jumps/gotos + * and set lookups until either the jump limit is hit or all reachable + * chains have been validated. + */ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) { struct nft_expr *expr, *last; @@ -3533,6 +3542,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (!expr->ops->validate) continue;
+ /* This may call nft_chain_validate() recursively, + * callers that do so must increment ctx->level. + */ err = expr->ops->validate(ctx, expr, &data); if (err < 0) return err; @@ -10190,143 +10202,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, } EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
-/* - * Loop detection - walk through the ruleset beginning at the destination chain - * of a new jump until either the source chain is reached (loop) or all - * reachable chains have been traversed. - * - * The loop check is performed whenever a new jump verdict is added to an - * expression or verdict map or a verdict map is bound to a new chain. - */ - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain); - -static int nft_check_loops(const struct nft_ctx *ctx, - const struct nft_set_ext *ext) -{ - const struct nft_data *data; - int ret; - - data = nft_set_ext_data(ext); - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - ret = nf_tables_check_loops(ctx, data->verdict.chain); - break; - default: - ret = 0; - break; - } - - return ret; -} - -static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, - struct nft_set *set, - const struct nft_set_iter *iter, - struct nft_set_elem *elem) -{ - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); - - if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && - *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) - return 0; - - return nft_check_loops(ctx, ext); -} - -static int nft_set_catchall_loops(const struct nft_ctx *ctx, - struct nft_set *set) -{ - u8 genmask = nft_genmask_next(ctx->net); - struct nft_set_elem_catchall *catchall; - struct nft_set_ext *ext; - int ret = 0; - - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { - ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_active(ext, genmask)) - continue; - - ret = nft_check_loops(ctx, ext); - if (ret < 0) - return ret; - } - - return ret; -} - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain) -{ - const struct nft_rule *rule; - const struct nft_expr *expr, *last; - struct nft_set *set; - struct nft_set_binding *binding; - struct nft_set_iter iter; - - if (ctx->chain == chain) - return -ELOOP; - - list_for_each_entry(rule, &chain->rules, list) { - nft_rule_for_each_expr(expr, last, rule) { - struct nft_immediate_expr *priv; - const struct nft_data *data; - int err; - - if (strcmp(expr->ops->type->name, "immediate")) - continue; - - priv = nft_expr_priv(expr); - if (priv->dreg != NFT_REG_VERDICT) - continue; - - data = &priv->data; - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - err = nf_tables_check_loops(ctx, - data->verdict.chain); - if (err < 0) - return err; - break; - default: - break; - } - } - } - - list_for_each_entry(set, &ctx->table->sets, list) { - if (!nft_is_active_next(ctx->net, set)) - continue; - if (!(set->flags & NFT_SET_MAP) || - set->dtype != NFT_DATA_VERDICT) - continue; - - list_for_each_entry(binding, &set->bindings, list) { - if (!(binding->flags & NFT_SET_MAP) || - binding->chain != chain) - continue; - - iter.genmask = nft_genmask_next(ctx->net); - iter.skip = 0; - iter.count = 0; - iter.err = 0; - iter.fn = nf_tables_loop_check_setelem; - - set->ops->walk(ctx, set, &iter); - if (!iter.err) - iter.err = nft_set_catchall_loops(ctx, set); - - if (iter.err < 0) - return iter.err; - } - } - - return 0; -} - /** * nft_parse_u32_check - fetch u32 attribute and check for maximum value * @@ -10439,7 +10314,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, if (data != NULL && (data->verdict.code == NFT_GOTO || data->verdict.code == NFT_JUMP)) { - err = nf_tables_check_loops(ctx, data->verdict.chain); + err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; }
On Mon, Aug 12, 2024 at 12:23:17PM +0200, Pablo Neira Ayuso wrote:
Hi Greg, Sasha,
This batch contains a backport for recent fixes already upstream for 6.1.x.
The following list shows the backported patches, I am using original commit IDs for reference:
3c13725f43dc ("netfilter: nf_tables: bail out if stateful expression provides no .clone")
fa23e0d4b756 ("netfilter: nf_tables: allow clone callbacks to sleep")
cff3bd012a95 ("netfilter: nf_tables: prefer nft_chain_validate")
Please, apply, Thanks
Florian Westphal (2): netfilter: nf_tables: allow clone callbacks to sleep netfilter: nf_tables: prefer nft_chain_validate
Pablo Neira Ayuso (1): netfilter: nf_tables: bail out if stateful expression provides no .clone
include/net/netfilter/nf_tables.h | 4 +- net/netfilter/nf_tables_api.c | 172 ++++-------------------------- net/netfilter/nft_connlimit.c | 4 +- net/netfilter/nft_counter.c | 4 +- net/netfilter/nft_dynset.c | 2 +- net/netfilter/nft_last.c | 4 +- net/netfilter/nft_limit.c | 14 +-- net/netfilter/nft_quota.c | 4 +- 8 files changed, 42 insertions(+), 166 deletions(-)
-- 2.30.2
All now queued up, thanks!
greg k-h
linux-stable-mirror@lists.linaro.org