From: Antoine Tenart atenart@kernel.org
[ Upstream commit 95eabdd207024312876d0ebed90b4c977e050e85 ]
Commit 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning") changed the eviction rescheduling to the use average expiry of scanned entries (within 1-60s) by doing:
for (...) { expires = clamp(nf_ct_expires(tmp), ...); next_run += expires; next_run /= 2; }
The issue is the above will make the average ('next_run' here) more dependent on the last expiration values than the firsts (for sets > 2). Depending on the expiration values used to compute the average, the result can be quite different than what's expected. To fix this we can do the following:
for (...) { expires = clamp(nf_ct_expires(tmp), ...); next_run += (expires - next_run) / ++count; }
Fixes: 2cfadb761d3d ("netfilter: conntrack: revisit gc autotuning") Cc: Florian Westphal fw@strlen.de Signed-off-by: Antoine Tenart atenart@kernel.org Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Sasha Levin sashal@kernel.org --- net/netfilter/nf_conntrack_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 31399c53dfb1..ee72da164190 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -67,6 +67,7 @@ struct conntrack_gc_work { struct delayed_work dwork; u32 next_bucket; u32 avg_timeout; + u32 count; u32 start_time; bool exiting; bool early_drop; @@ -1439,6 +1440,7 @@ static void gc_worker(struct work_struct *work) unsigned int expired_count = 0; unsigned long next_run; s32 delta_time; + long count;
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
@@ -1448,10 +1450,12 @@ static void gc_worker(struct work_struct *work)
if (i == 0) { gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT; + gc_work->count = 1; gc_work->start_time = start_time; }
next_run = gc_work->avg_timeout; + count = gc_work->count;
end_time = start_time + GC_SCAN_MAX_DURATION;
@@ -1471,8 +1475,8 @@ static void gc_worker(struct work_struct *work)
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct nf_conntrack_net *cnet; - unsigned long expires; struct net *net; + long expires;
tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -1486,6 +1490,7 @@ static void gc_worker(struct work_struct *work)
gc_work->next_bucket = i; gc_work->avg_timeout = next_run; + gc_work->count = count;
delta_time = nfct_time_stamp - gc_work->start_time;
@@ -1501,8 +1506,8 @@ static void gc_worker(struct work_struct *work) }
expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP); + expires = (expires - (long)next_run) / ++count; next_run += expires; - next_run /= 2u;
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) continue; @@ -1540,6 +1545,7 @@ static void gc_worker(struct work_struct *work) delta_time = nfct_time_stamp - end_time; if (delta_time > 0 && i < hashsz) { gc_work->avg_timeout = next_run; + gc_work->count = count; gc_work->next_bucket = i; next_run = 0; goto early_exit;