On Mon, May 05, 2025 at 11:48:33AM +0200, chia-yu.chang@nokia-bell-labs.com wrote:
From: Chia-Yu Chang chia-yu.chang@nokia-bell-labs.com
DualPI2 is the reference implementation of IETF RFC9332 DualQ Coupled AQM (https://datatracker.ietf.org/doc/html/rfc9332) providing two queues called low latency (L-queue) and classic (C-queue). By default, it enqueues non-ECN and ECT(0) packets into the C-queue and ECT(1) and CE packets into the low latency queue (L-queue), as per IETF RFC9332 spec.
This patch defines the dualpi2 Qdisc structure and parsing, and the following two patches include dumping and enqueue/dequeue for the DualPI2.
Signed-off-by: Chia-Yu Chang chia-yu.chang@nokia-bell-labs.com
...
diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c
...
+static int dualpi2_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
+{
- struct nlattr *tb[TCA_DUALPI2_MAX + 1];
- struct dualpi2_sched_data *q;
- int old_backlog;
- int old_qlen;
- int err;
- if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_DUALPI2_MAX, opt, dualpi2_policy,
extack);
- if (err < 0)
return err;
- q = qdisc_priv(sch);
- sch_tree_lock(sch);
- if (tb[TCA_DUALPI2_LIMIT]) {
u32 limit = nla_get_u32(tb[TCA_DUALPI2_LIMIT]);
WRITE_ONCE(sch->limit, limit);
WRITE_ONCE(q->memory_limit, get_memory_limit(sch, limit));
- }
- if (tb[TCA_DUALPI2_MEMORY_LIMIT])
WRITE_ONCE(q->memory_limit,
nla_get_u32(tb[TCA_DUALPI2_MEMORY_LIMIT]));
- if (tb[TCA_DUALPI2_TARGET]) {
u64 target = nla_get_u32(tb[TCA_DUALPI2_TARGET]);
WRITE_ONCE(q->pi2_target, target * NSEC_PER_USEC);
- }
- if (tb[TCA_DUALPI2_TUPDATE]) {
u64 tupdate = nla_get_u32(tb[TCA_DUALPI2_TUPDATE]);
WRITE_ONCE(q->pi2_tupdate, tupdate * NSEC_PER_USEC);
- }
- if (tb[TCA_DUALPI2_ALPHA]) {
u32 alpha = nla_get_u32(tb[TCA_DUALPI2_ALPHA]);
WRITE_ONCE(q->pi2_alpha, dualpi2_scale_alpha_beta(alpha));
- }
- if (tb[TCA_DUALPI2_BETA]) {
u32 beta = nla_get_u32(tb[TCA_DUALPI2_BETA]);
WRITE_ONCE(q->pi2_beta, dualpi2_scale_alpha_beta(beta));
- }
- if (tb[TCA_DUALPI2_STEP_THRESH]) {
u32 step_th = nla_get_u32(tb[TCA_DUALPI2_STEP_THRESH]);
bool step_pkt = nla_get_flag(tb[TCA_DUALPI2_STEP_PACKETS]);
WRITE_ONCE(q->step_in_packets, step_pkt);
WRITE_ONCE(q->step_thresh,
step_pkt ? step_th : (step_th * NSEC_PER_USEC));
- }
- if (tb[TCA_DUALPI2_MIN_QLEN_STEP])
WRITE_ONCE(q->min_qlen_step,
nla_get_u32(tb[TCA_DUALPI2_MIN_QLEN_STEP]));
- if (tb[TCA_DUALPI2_COUPLING]) {
u8 coupling = nla_get_u8(tb[TCA_DUALPI2_COUPLING]);
WRITE_ONCE(q->coupling_factor, coupling);
- }
- if (tb[TCA_DUALPI2_DROP_OVERLOAD]) {
u8 drop_overload = nla_get_u8(tb[TCA_DUALPI2_DROP_OVERLOAD]);
WRITE_ONCE(q->drop_overload, (bool)drop_overload);
- }
- if (tb[TCA_DUALPI2_DROP_EARLY]) {
u8 drop_early = nla_get_u8(tb[TCA_DUALPI2_DROP_EARLY]);
WRITE_ONCE(q->drop_early, (bool)drop_early);
- }
- if (tb[TCA_DUALPI2_C_PROTECTION]) {
u8 wc = nla_get_u8(tb[TCA_DUALPI2_C_PROTECTION]);
dualpi2_calculate_c_protection(sch, q, wc);
- }
- if (tb[TCA_DUALPI2_ECN_MASK]) {
u8 ecn_mask = nla_get_u8(tb[TCA_DUALPI2_ECN_MASK]);
if (ecn_mask != TCA_DUALPI2_ECN_MASK_L4S_ECT &&
ecn_mask != TCA_DUALPI2_ECN_MASK_ANY_ECT)
return -EINVAL;
sch_tree_lock() is leaked here.
Flagged by Sparse and Smatch.
WRITE_ONCE(q->ecn_mask, ecn_mask);
- }
- if (tb[TCA_DUALPI2_SPLIT_GSO]) {
u8 split_gso = nla_get_u8(tb[TCA_DUALPI2_SPLIT_GSO]);
WRITE_ONCE(q->split_gso, (bool)split_gso);
- }
- old_qlen = qdisc_qlen(sch);
- old_backlog = sch->qstats.backlog;
- while (qdisc_qlen(sch) > sch->limit ||
q->memory_used > q->memory_limit) {
struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
q->memory_used -= skb->truesize;
qdisc_qstats_backlog_dec(sch, skb);
rtnl_qdisc_drop(skb, sch);
- }
- qdisc_tree_reduce_backlog(sch, old_qlen - qdisc_qlen(sch),
old_backlog - sch->qstats.backlog);
- sch_tree_unlock(sch);
- return 0;
+}
...