Hi Greg, Sasha,
This batch contains a backport for recent fixes already upstream for 4.19.x.
The following list shows the backported patches, I am using original commit IDs for reference:
1) b53c11664250 ("netfilter: nf_tables: set element extended ACK reporting support")
2) 7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set element timeout")
3) cff3bd012a95 ("netfilter: nf_tables: prefer nft_chain_validate")
Please, apply, Thanks
Florian Westphal (1): netfilter: nf_tables: prefer nft_chain_validate
Pablo Neira Ayuso (2): netfilter: nf_tables: set element extended ACK reporting support netfilter: nf_tables: use timestamp to check for set element timeout
include/net/netfilter/nf_tables.h | 21 ++++- net/netfilter/nf_tables_api.c | 128 ++++++------------------------ net/netfilter/nft_set_hash.c | 8 +- net/netfilter/nft_set_rbtree.c | 6 +- 4 files changed, 53 insertions(+), 110 deletions(-)
commit b53c116642502b0c85ecef78bff4f826a7dd4145 upstream.
Report the element that causes problems via netlink extended ACK for set element commands.
Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- net/netfilter/nf_tables_api.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f2611406af14..b64d3cd97ee7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4523,8 +4523,10 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_get_set_elem(&ctx, set, attr); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); break; + } }
return err; @@ -4902,8 +4904,10 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); return err; + } }
if (nft_net->validate_state == NFT_VALIDATE_DO) @@ -5103,9 +5107,10 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); break; - + } set->ndeact++; } return err;
commit 7395dfacfff65e9938ac0889dafa1ab01e987d15 upstream
Add a timestamp field at the beginning of the transaction, store it in the nftables per-netns area.
Update set backend .insert, .deactivate and sync gc path to use the timestamp, this avoids that an element expires while control plane transaction is still unfinished.
.lookup and .update, which are used from packet path, still use the current time to check if the element has expired. And .get path and dump also since this runs lockless under rcu read size lock. Then, there is async gc which also needs to check the current time since it runs asynchronously from a workqueue.
[ NB: rbtree GC updates has been excluded because GC is asynchronous. ]
Fixes: c3e1b005ed1c ("netfilter: nf_tables: add set element timeout support") Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- include/net/netfilter/nf_tables.h | 21 +++++++++++++++++++-- net/netfilter/nf_tables_api.c | 1 + net/netfilter/nft_set_hash.c | 8 +++++++- net/netfilter/nft_set_rbtree.c | 6 ++++-- 4 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4a0f51c2b3b9..9eb7d7de590f 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -12,6 +12,7 @@ #include <linux/rhashtable.h> #include <net/netfilter/nf_flow_table.h> #include <net/netlink.h> +#include <net/netns/generic.h>
#define NFT_JUMP_STACK_SIZE 16
@@ -636,10 +637,16 @@ static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_EXPR); }
-static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext, + u64 tstamp) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && - time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext)); + time_after_eq64(tstamp, *nft_set_ext_expiration(ext)); +} + +static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) +{ + return __nft_set_elem_expired(ext, get_jiffies_64()); }
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, @@ -1423,11 +1430,21 @@ struct nftables_pernet { struct list_head module_list; struct list_head notify_list; struct mutex commit_mutex; + u64 tstamp; unsigned int base_seq; u8 validate_state; unsigned int gc_seq; };
+extern unsigned int nf_tables_net_id; + +static inline u64 nft_net_tstamp(const struct net *net) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return nft_net->tstamp; +} + int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); __be64 nf_jiffies64_to_msecs(u64 input);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b64d3cd97ee7..c8a1f3f14384 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7365,6 +7365,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid) bool genid_ok;
mutex_lock(&nft_net->commit_mutex); + nft_net->tstamp = get_jiffies_64();
genid_ok = genid == 0 || nft_net->base_seq == genid; if (!genid_ok) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 5e562e7cd470..8e249e98aeea 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -41,6 +41,7 @@ struct nft_rhash_cmp_arg { const struct nft_set *set; const u32 *key; u8 genmask; + u64 tstamp; };
static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed) @@ -67,7 +68,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, return 1; if (nft_set_elem_is_dead(&he->ext)) return 1; - if (nft_set_elem_expired(&he->ext)) + if (__nft_set_elem_expired(&he->ext, x->tstamp)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) return 1; @@ -91,6 +92,7 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_cur(net), .set = set, .key = key, + .tstamp = get_jiffies_64(), };
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); @@ -109,6 +111,7 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_cur(net), .set = set, .key = elem->key.val.data, + .tstamp = get_jiffies_64(), };
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); @@ -132,6 +135,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, .genmask = NFT_GENMASK_ANY, .set = set, .key = key, + .tstamp = get_jiffies_64(), };
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); @@ -175,6 +179,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, + .tstamp = nft_net_tstamp(net), }; struct nft_rhash_elem *prev;
@@ -217,6 +222,7 @@ static void *nft_rhash_deactivate(const struct net *net, .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, + .tstamp = nft_net_tstamp(net), };
rcu_read_lock(); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index caddacc1d446..f5bec0e37c0d 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -318,6 +318,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); + u64 tstamp = nft_net_tstamp(net); int d, err;
/* Descend the tree to search for an existing element greater than the @@ -365,7 +366,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, /* perform garbage collection to avoid bogus overlap reports * but skip new elements in this transaction. */ - if (nft_set_elem_expired(&rbe->ext) && + if (__nft_set_elem_expired(&rbe->ext, tstamp) && nft_set_elem_active(&rbe->ext, cur_genmask)) { err = nft_rbtree_gc_elem(set, priv, rbe); if (err < 0) @@ -540,6 +541,7 @@ static void *nft_rbtree_deactivate(const struct net *net, const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe, *this = elem->priv; u8 genmask = nft_genmask_next(net); + u64 tstamp = nft_net_tstamp(net); int d;
while (parent != NULL) { @@ -560,7 +562,7 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; - } else if (nft_set_elem_expired(&rbe->ext)) { + } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) { break; } else if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = parent->rb_left;
From: Florian Westphal fw@strlen.de
commit cff3bd012a9512ac5ed858d38e6ed65f6391008c upstream
nft_chain_validate already performs loop detection because a cycle will result in a call stack overflow (ctx->level >= NFT_JUMP_STACK_SIZE).
It also follows maps via ->validate callback in nft_lookup, so there appears no reason to iterate the maps again.
nf_tables_check_loops() and all its helper functions can be removed. This improves ruleset load time significantly, from 23s down to 12s.
This also fixes a crash bug. Old loop detection code can result in unbounded recursion:
BUG: TASK stack guard page was hit at .... Oops: stack guard page: 0000 [#1] PREEMPT SMP KASAN CPU: 4 PID: 1539 Comm: nft Not tainted 6.10.0-rc5+ #1 [..]
with a suitable ruleset during validation of register stores.
I can't see any actual reason to attempt to check for this from nft_validate_register_store(), at this point the transaction is still in progress, so we don't have a full picture of the rule graph.
For nf-next it might make sense to either remove it or make this depend on table->validate_state in case we could catch an error earlier (for improved error reporting to userspace).
Fixes: 20a69341f2d0 ("netfilter: nf_tables: add netlink set API") Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org --- net/netfilter/nf_tables_api.c | 114 ++++------------------------------ 1 file changed, 13 insertions(+), 101 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c8a1f3f14384..a033c9baf58a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2698,6 +2698,15 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, nf_tables_rule_destroy(ctx, rule); }
+/** nft_chain_validate - loop detection and hook validation + * + * @ctx: context containing call depth and base chain + * @chain: chain to validate + * + * Walk through the rules of the given chain and chase all jumps/gotos + * and set lookups until either the jump limit is hit or all reachable + * chains have been validated. + */ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) { struct nft_expr *expr, *last; @@ -2716,6 +2725,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (!expr->ops->validate) continue;
+ /* This may call nft_chain_validate() recursively, + * callers that do so must increment ctx->level. + */ err = expr->ops->validate(ctx, expr, &data); if (err < 0) return err; @@ -7418,106 +7430,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, } EXPORT_SYMBOL_GPL(nft_chain_validate_hooks);
-/* - * Loop detection - walk through the ruleset beginning at the destination chain - * of a new jump until either the source chain is reached (loop) or all - * reachable chains have been traversed. - * - * The loop check is performed whenever a new jump verdict is added to an - * expression or verdict map or a verdict map is bound to a new chain. - */ - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain); - -static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, - struct nft_set *set, - const struct nft_set_iter *iter, - struct nft_set_elem *elem) -{ - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); - const struct nft_data *data; - - if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && - *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) - return 0; - - data = nft_set_ext_data(ext); - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - return nf_tables_check_loops(ctx, data->verdict.chain); - default: - return 0; - } -} - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain) -{ - const struct nft_rule *rule; - const struct nft_expr *expr, *last; - struct nft_set *set; - struct nft_set_binding *binding; - struct nft_set_iter iter; - - if (ctx->chain == chain) - return -ELOOP; - - list_for_each_entry(rule, &chain->rules, list) { - nft_rule_for_each_expr(expr, last, rule) { - struct nft_immediate_expr *priv; - const struct nft_data *data; - int err; - - if (strcmp(expr->ops->type->name, "immediate")) - continue; - - priv = nft_expr_priv(expr); - if (priv->dreg != NFT_REG_VERDICT) - continue; - - data = &priv->data; - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - err = nf_tables_check_loops(ctx, - data->verdict.chain); - if (err < 0) - return err; - default: - break; - } - } - } - - list_for_each_entry(set, &ctx->table->sets, list) { - if (!nft_is_active_next(ctx->net, set)) - continue; - if (!(set->flags & NFT_SET_MAP) || - set->dtype != NFT_DATA_VERDICT) - continue; - - list_for_each_entry(binding, &set->bindings, list) { - if (!(binding->flags & NFT_SET_MAP) || - binding->chain != chain) - continue; - - iter.genmask = nft_genmask_next(ctx->net); - iter.skip = 0; - iter.count = 0; - iter.err = 0; - iter.fn = nf_tables_loop_check_setelem; - - set->ops->walk(ctx, set, &iter); - if (iter.err < 0) - return iter.err; - } - } - - return 0; -} - /** * nft_parse_u32_check - fetch u32 attribute and check for maximum value * @@ -7653,7 +7565,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, if (data != NULL && (data->verdict.code == NFT_GOTO || data->verdict.code == NFT_JUMP)) { - err = nf_tables_check_loops(ctx, data->verdict.chain); + err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; }
On Mon, Aug 12, 2024 at 12:29:22PM +0200, Pablo Neira Ayuso wrote:
Hi Greg, Sasha,
This batch contains a backport for recent fixes already upstream for 4.19.x.
The following list shows the backported patches, I am using original commit IDs for reference:
b53c11664250 ("netfilter: nf_tables: set element extended ACK reporting support")
7395dfacfff6 ("netfilter: nf_tables: use timestamp to check for set element timeout")
cff3bd012a95 ("netfilter: nf_tables: prefer nft_chain_validate")
Please, apply, Thanks
Now queued up, thanks.
greg k-h
linux-stable-mirror@lists.linaro.org