Open vSwitch module accepts actions as a list from the netlink socket and then creates a copy which it uses in the action set processing. During processing of the action list on a packet, the module keeps a count of the execution depth and exits processing if the action depth goes too high.
However, during netlink processing the recursion depth isn't checked anywhere, and the copy trusts that kernel has large enough stack to accommodate it. The OVS sample action was the original action which could perform this kinds of recursion, and it originally checked that it didn't exceed the sample depth limit. However, when sample became optimized to provide the clone() semantics, the recursion limit was dropped.
This series adds a depth limit during the __ovs_nla_copy_actions() call that will ensure we don't exceed the max that the OVS userspace could generate for a clone().
Additionally, this series provides a selftest in 2/2 that can be used to determine if the OVS module is allowing unbounded access. It can be safely omitted where the ovs selftest framework isn't available.
Aaron Conole (2): net: openvswitch: limit the number of recursions from action sets selftests: openvswitch: Add validation for the recursion test
net/openvswitch/flow_netlink.c | 49 ++++++++----- .../selftests/net/openvswitch/openvswitch.sh | 13 ++++ .../selftests/net/openvswitch/ovs-dpctl.py | 71 +++++++++++++++---- 3 files changed, 102 insertions(+), 31 deletions(-)
The ovs module allows for some actions to recursively contain an action list for complex scenarios, such as sampling, checking lengths, etc. When these actions are copied into the internal flow table, they are evaluated to validate that such actions make sense, and these calls happen recursively.
The ovs-vswitchd userspace won't emit more than 16 recursion levels deep. However, the module has no such limit and will happily accept limits larger than 16 levels nested. Prevent this by tracking the number of recursions happening and manually limiting it to 16 levels nested.
The initial implementation of the sample action would track this depth and prevent more than 3 levels of recursion, but this was removed to support the clone use case, rather than limited at the current userspace limit.
Fixes: 798c166173ff ("openvswitch: Optimize sample action for the clone use cases") Signed-off-by: Aaron Conole aconole@redhat.com --- v1->v2: Switch to tracking the stack depth by using a depth argument rather than a per-cpu counter.
net/openvswitch/flow_netlink.c | 49 +++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 16 deletions(-)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 88965e2068ac..ebc5728aab4e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -48,6 +48,7 @@ struct ovs_len_tbl {
#define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +#define OVS_COPY_ACTIONS_MAX_DEPTH 16
static bool actions_may_change_flow(const struct nlattr *actions) { @@ -2545,13 +2546,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log); + u32 mpls_label_count, bool log, + u32 depth);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -2602,7 +2605,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return err;
err = __ovs_nla_copy_actions(net, actions, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1);
if (err) return err; @@ -2617,7 +2621,8 @@ static int validate_and_copy_dec_ttl(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; int start, action_start, err, rem; @@ -2660,7 +2665,8 @@ static int validate_and_copy_dec_ttl(struct net *net, return action_start;
err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, - vlan_tci, mpls_label_count, log); + vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err;
@@ -2674,7 +2680,8 @@ static int validate_and_copy_clone(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { int start, err; u32 exec; @@ -2694,7 +2701,8 @@ static int validate_and_copy_clone(struct net *net, return err;
err = __ovs_nla_copy_actions(net, attr, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err;
@@ -3063,7 +3071,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, - bool log, bool last) + bool log, bool last, u32 depth) { const struct nlattr *acts_if_greater, *acts_if_lesser_eq; struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; @@ -3111,7 +3119,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1);
if (err) return err; @@ -3124,7 +3133,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1);
if (err) return err; @@ -3152,12 +3162,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err;
+ if (depth > OVS_COPY_ACTIONS_MAX_DEPTH) + return -EOVERFLOW; + nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -3355,7 +3369,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_sample(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3426,7 +3440,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_clone(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3440,7 +3454,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, + depth); if (err) return err; skip_copy = true; @@ -3450,7 +3465,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_DEC_TTL: err = validate_and_copy_dec_ttl(net, a, key, sfa, eth_type, vlan_tci, - mpls_label_count, log); + mpls_label_count, log, + depth); if (err) return err; skip_copy = true; @@ -3495,7 +3511,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
(*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, - key->eth.vlan.tci, mpls_label_count, log); + key->eth.vlan.tci, mpls_label_count, log, + 0); if (err) ovs_nla_free_flow_actions(*sfa);
On Wed, Feb 07, 2024 at 08:24:15AM -0500, Aaron Conole wrote:
The ovs module allows for some actions to recursively contain an action list for complex scenarios, such as sampling, checking lengths, etc. When these actions are copied into the internal flow table, they are evaluated to validate that such actions make sense, and these calls happen recursively.
The ovs-vswitchd userspace won't emit more than 16 recursion levels deep. However, the module has no such limit and will happily accept limits larger than 16 levels nested. Prevent this by tracking the number of recursions happening and manually limiting it to 16 levels nested.
The initial implementation of the sample action would track this depth and prevent more than 3 levels of recursion, but this was removed to support the clone use case, rather than limited at the current userspace limit.
Fixes: 798c166173ff ("openvswitch: Optimize sample action for the clone use cases") Signed-off-by: Aaron Conole aconole@redhat.com
v1->v2: Switch to tracking the stack depth by using a depth argument rather than a per-cpu counter.
Thanks for the update.
Reviewed-by: Simon Horman horms@kernel.org
Add a test case into the netlink checks that will show the number of nested action recursions won't exceed 16. Going to 17 on a small clone call isn't enough to exhaust the stack on (most) systems, so it should be safe to run even on systems that don't have the fix applied.
Signed-off-by: Aaron Conole aconole@redhat.com --- v1->v2: Used a slightly more portable io redirection semantic for the clone() flow test case.
.../selftests/net/openvswitch/openvswitch.sh | 13 ++++ .../selftests/net/openvswitch/ovs-dpctl.py | 71 +++++++++++++++---- 2 files changed, 69 insertions(+), 15 deletions(-)
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index f8499d4c87f3..36e40256ab92 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -502,7 +502,20 @@ test_netlink_checks () { wc -l) == 2 ] || \ return 1
+ info "Checking clone depth" ERR_MSG="Flow actions may not be safe on all matching packets" + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow "test_netlink_checks" nv0 \ + 'in_port(1),eth(),eth_type(0x800),ipv4()' \ + 'clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(drop)))))))))))))))))' \ + >/dev/null 2>&1 && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + + if [ "$PRE_TEST" == "$POST_TEST" ]; then + info "failed - clone depth too large" + return 1 + fi + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") ovs_add_flow "test_netlink_checks" nv0 \ 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \ diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index b97e621face9..5e0e539a323d 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -299,7 +299,7 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_PUSH_NSH", "none"), ("OVS_ACTION_ATTR_POP_NSH", "flag"), ("OVS_ACTION_ATTR_METER", "none"), - ("OVS_ACTION_ATTR_CLONE", "none"), + ("OVS_ACTION_ATTR_CLONE", "recursive"), ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), @@ -465,29 +465,42 @@ class ovsactions(nla): print_str += "pop_mpls" else: datum = self.get_attr(field[0]) - print_str += datum.dpstr(more) + if field[0] == "OVS_ACTION_ATTR_CLONE": + print_str += "clone(" + print_str += datum.dpstr(more) + print_str += ")" + else: + print_str += datum.dpstr(more)
return print_str
def parse(self, actstr): + totallen = len(actstr) while len(actstr) != 0: parsed = False + parencount = 0 if actstr.startswith("drop"): # If no reason is provided, the implicit drop is used (i.e no # action). If some reason is given, an explicit action is used. - actstr, reason = parse_extract_field( - actstr, - "drop(", - "([0-9]+)", - lambda x: int(x, 0), - False, - None, - ) + reason = None + if actstr.startswith("drop("): + parencount += 1 + + actstr, reason = parse_extract_field( + actstr, + "drop(", + "([0-9]+)", + lambda x: int(x, 0), + False, + None, + ) + if reason is not None: self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason]) parsed = True else: - return + actstr = actstr[len("drop"): ] + return (totallen - len(actstr))
elif parse_starts_block(actstr, "^(\d+)", False, True): actstr, output = parse_extract_field( @@ -504,6 +517,7 @@ class ovsactions(nla): False, 0, ) + parencount += 1 self["attrs"].append(["OVS_ACTION_ATTR_RECIRC", recircid]) parsed = True
@@ -516,12 +530,22 @@ class ovsactions(nla):
for flat_act in parse_flat_map: if parse_starts_block(actstr, flat_act[0], False): - actstr += len(flat_act[0]) + actstr = actstr[len(flat_act[0]):] self["attrs"].append([flat_act[1]]) actstr = actstr[strspn(actstr, ", ") :] parsed = True
- if parse_starts_block(actstr, "ct(", False): + if parse_starts_block(actstr, "clone(", False): + parencount += 1 + subacts = ovsactions() + actstr = actstr[len("clone("):] + parsedLen = subacts.parse(actstr) + lst = [] + self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts)) + actstr = actstr[parsedLen:] + parsed = True + elif parse_starts_block(actstr, "ct(", False): + parencount += 1 actstr = actstr[len("ct(") :] ctact = ovsactions.ctact()
@@ -553,6 +577,7 @@ class ovsactions(nla): natact = ovsactions.ctact.natattr()
if actstr.startswith("("): + parencount += 1 t = None actstr = actstr[1:] if actstr.startswith("src"): @@ -607,15 +632,29 @@ class ovsactions(nla): actstr = actstr[strspn(actstr, ", ") :]
ctact["attrs"].append(["OVS_CT_ATTR_NAT", natact]) - actstr = actstr[strspn(actstr, ",) ") :] + actstr = actstr[strspn(actstr, ", ") :]
self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact]) parsed = True
- actstr = actstr[strspn(actstr, "), ") :] + actstr = actstr[strspn(actstr, ", ") :] + while parencount > 0: + parencount -= 1 + actstr = actstr[strspn(actstr, " "):] + if len(actstr) and actstr[0] != ")": + raise ValueError("Action str: '%s' unbalanced" % actstr) + actstr = actstr[1:] + + if len(actstr) and actstr[0] == ")": + return (totallen - len(actstr)) + + actstr = actstr[strspn(actstr, ", ") :] + if not parsed: raise ValueError("Action str: '%s' not supported" % actstr)
+ return (totallen - len(actstr)) +
class ovskey(nla): nla_flags = NLA_F_NESTED @@ -2111,6 +2150,8 @@ def main(argv): ovsflow = OvsFlow() ndb = NDB()
+ sys.setrecursionlimit(100000) + if hasattr(args, "showdp"): found = False for iface in ndb.interfaces:
On Wed, Feb 07, 2024 at 08:24:16AM -0500, Aaron Conole wrote:
Add a test case into the netlink checks that will show the number of nested action recursions won't exceed 16. Going to 17 on a small clone call isn't enough to exhaust the stack on (most) systems, so it should be safe to run even on systems that don't have the fix applied.
Signed-off-by: Aaron Conole aconole@redhat.com
v1->v2: Used a slightly more portable io redirection semantic for the clone() flow test case.
Reviewed-by: Simon Horman horms@kernel.org
Hello:
This series was applied to netdev/net.git (main) by Jakub Kicinski kuba@kernel.org:
On Wed, 7 Feb 2024 08:24:14 -0500 you wrote:
Open vSwitch module accepts actions as a list from the netlink socket and then creates a copy which it uses in the action set processing. During processing of the action list on a packet, the module keeps a count of the execution depth and exits processing if the action depth goes too high.
However, during netlink processing the recursion depth isn't checked anywhere, and the copy trusts that kernel has large enough stack to accommodate it. The OVS sample action was the original action which could perform this kinds of recursion, and it originally checked that it didn't exceed the sample depth limit. However, when sample became optimized to provide the clone() semantics, the recursion limit was dropped.
[...]
Here is the summary with links: - [net,v2,1/2] net: openvswitch: limit the number of recursions from action sets https://git.kernel.org/netdev/net/c/6e2f90d31fe0 - [net,v2,2/2] selftests: openvswitch: Add validation for the recursion test https://git.kernel.org/netdev/net/c/bd128f62c365
You are awesome, thank you!
linux-kselftest-mirror@lists.linaro.org