When I implemented virtio's hash-related features to tun/tap [1], I found tun/tap does not fill the entire region reserved for the virtio header, leaving some uninitialized hole in the middle of the buffer after read()/recvmesg().
This series fills the uninitialized hole. More concretely, the num_buffers field will be initialized with 1, and the other fields will be inialized with 0. Setting the num_buffers field to 1 is mandated by virtio 1.0 [2].
The change to virtio header is preceded by another change that refactors tun and tap to unify their virtio-related code.
[1]: https://lore.kernel.org/r/20241008-rss-v5-0-f3cf68df005d@daynix.com [2]: https://lore.kernel.org/r/20241227084256-mutt-send-email-mst@kernel.org/
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- Changes in v4: - s/sz/vnet_hdr_len_sz/ for patch "tun: Decouple vnet handling" (Willem de Bruijn) - Reverted to add CONFIG_TUN_VNET. - Link to v3: https://lore.kernel.org/r/20250116-tun-v3-0-c6b2871e97f7@daynix.com
Changes in v3: - Dropped changes to fill the vnet header. - Splitted patch "tun: Unify vnet implementation". - Reverted spurious changes in patch "tun: Unify vnet implementation". - Merged tun_vnet.c into TAP. - Link to v2: https://lore.kernel.org/r/20250109-tun-v2-0-388d7d5a287a@daynix.com
Changes in v2: - Fixed num_buffers endian. - Link to v1: https://lore.kernel.org/r/20250108-tun-v1-0-67d784b34374@daynix.com
--- Akihiko Odaki (9): tun: Refactor CONFIG_TUN_VNET_CROSS_LE tun: Avoid double-tracking iov_iter length changes tun: Keep hdr_len in tun_get_user() tun: Decouple vnet from tun_struct tun: Decouple vnet handling tun: Extract the vnet handling code tap: Avoid double-tracking iov_iter length changes tap: Keep hdr_len in tap_get_user() tap: Use tun's vnet-related code
MAINTAINERS | 2 +- drivers/net/Kconfig | 5 ++ drivers/net/Makefile | 1 + drivers/net/tap.c | 172 ++++++------------------------------------ drivers/net/tun.c | 200 +++++++------------------------------------------ drivers/net/tun_vnet.c | 184 +++++++++++++++++++++++++++++++++++++++++++++ drivers/net/tun_vnet.h | 25 +++++++ 7 files changed, 267 insertions(+), 322 deletions(-) --- base-commit: a32e14f8aef69b42826cf0998b068a43d486a9e9 change-id: 20241230-tun-66e10a49b0c7
Best regards,
Check IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) to save some lines and make future changes easier.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com Reviewed-by: Willem de Bruijn willemb@google.com --- drivers/net/tun.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e816aaba8e5f2ed06f8832f79553b6c976e75bb8..452fc5104260fe7ff5fdd5cedc5d2647cbe35c79 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -298,10 +298,10 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; }
-#ifdef CONFIG_TUN_VNET_CROSS_LE static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) { - return tun->flags & TUN_VNET_BE ? false : + return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && + (tun->flags & TUN_VNET_BE)) && virtio_legacy_is_little_endian(); }
@@ -309,6 +309,9 @@ static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) { int be = !!(tun->flags & TUN_VNET_BE);
+ if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + if (put_user(be, argp)) return -EFAULT;
@@ -319,6 +322,9 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) { int be;
+ if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + if (get_user(be, argp)) return -EFAULT;
@@ -329,22 +335,6 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
return 0; } -#else -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */
static inline bool tun_is_little_endian(struct tun_struct *tun) {
tun_get_user() used to track the length of iov_iter with another variable. We can use iov_iter_count() to determine the current length to avoid such chores.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com Reviewed-by: Willem de Bruijn willemb@google.com --- drivers/net/tun.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 452fc5104260fe7ff5fdd5cedc5d2647cbe35c79..bd272b4736fb7e9004f7d91dc83c69af5239bfe0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1742,7 +1742,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; size_t total_len = iov_iter_count(from); - size_t len = total_len, align = tun->align, linear; + size_t len, align = tun->align, linear; struct virtio_net_hdr gso = { 0 }; int good_linear; int copylen; @@ -1754,9 +1754,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (!(tun->flags & IFF_NO_PI)) { - if (len < sizeof(pi)) + if (iov_iter_count(from) < sizeof(pi)) return -EINVAL; - len -= sizeof(pi);
if (!copy_from_iter_full(&pi, sizeof(pi), from)) return -EFAULT; @@ -1765,9 +1764,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
- if (len < vnet_hdr_sz) + if (iov_iter_count(from) < vnet_hdr_sz) return -EINVAL; - len -= vnet_hdr_sz;
if (!copy_from_iter_full(&gso, sizeof(gso), from)) return -EFAULT; @@ -1776,11 +1774,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
- if (tun16_to_cpu(tun, gso.hdr_len) > len) + if (tun16_to_cpu(tun, gso.hdr_len) > iov_iter_count(from)) return -EINVAL; iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); }
+ len = iov_iter_count(from); + if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { align += NET_IP_ALIGN; if (unlikely(len < ETH_HLEN ||
hdr_len is repeatedly used so keep it in a local variable.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com Reviewed-by: Willem de Bruijn willemb@google.com --- drivers/net/tun.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bd272b4736fb7e9004f7d91dc83c69af5239bfe0..ec56ac86584813f990fabf4633e4d96ca81176ae 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1746,6 +1746,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct virtio_net_hdr gso = { 0 }; int good_linear; int copylen; + int hdr_len = 0; bool zerocopy = false; int err; u32 rxhash = 0; @@ -1776,6 +1777,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (tun16_to_cpu(tun, gso.hdr_len) > iov_iter_count(from)) return -EINVAL; + hdr_len = tun16_to_cpu(tun, gso.hdr_len); iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); }
@@ -1783,8 +1785,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { align += NET_IP_ALIGN; - if (unlikely(len < ETH_HLEN || - (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN))) + if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN))) return -EINVAL; }
@@ -1797,9 +1798,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; + copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear); linear = copylen; iov_iter_advance(&i, copylen); if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) @@ -1820,10 +1819,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } else { if (!zerocopy) { copylen = len; - if (tun16_to_cpu(tun, gso.hdr_len) > good_linear) - linear = good_linear; - else - linear = tun16_to_cpu(tun, gso.hdr_len); + linear = min(hdr_len, good_linear); }
if (frags) {
Decouple vnet-related functions from tun_struct so that we can reuse them for tap in the future.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tun.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ec56ac86584813f990fabf4633e4d96ca81176ae..add09dfdada5f76da87ae568072d121c2fc21caf 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -298,16 +298,16 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; }
-static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +static inline bool tun_legacy_is_little_endian(unsigned int flags) { return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && - (tun->flags & TUN_VNET_BE)) && + (flags & TUN_VNET_BE)) && virtio_legacy_is_little_endian(); }
-static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +static long tun_get_vnet_be(unsigned int flags, int __user *argp) { - int be = !!(tun->flags & TUN_VNET_BE); + int be = !!(flags & TUN_VNET_BE);
if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) return -EINVAL; @@ -318,7 +318,7 @@ static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) return 0; }
-static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +static long tun_set_vnet_be(unsigned int *flags, int __user *argp) { int be;
@@ -329,27 +329,26 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) return -EFAULT;
if (be) - tun->flags |= TUN_VNET_BE; + *flags |= TUN_VNET_BE; else - tun->flags &= ~TUN_VNET_BE; + *flags &= ~TUN_VNET_BE;
return 0; }
-static inline bool tun_is_little_endian(struct tun_struct *tun) +static inline bool tun_is_little_endian(unsigned int flags) { - return tun->flags & TUN_VNET_LE || - tun_legacy_is_little_endian(tun); + return flags & TUN_VNET_LE || tun_legacy_is_little_endian(flags); }
-static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) +static inline u16 tun16_to_cpu(unsigned int flags, __virtio16 val) { - return __virtio16_to_cpu(tun_is_little_endian(tun), val); + return __virtio16_to_cpu(tun_is_little_endian(flags), val); }
-static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) +static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) { - return __cpu_to_virtio16(tun_is_little_endian(tun), val); + return __cpu_to_virtio16(tun_is_little_endian(flags), val); }
static inline u32 tun_hashfn(u32 rxhash) @@ -1764,6 +1763,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); + int flags = tun->flags;
if (iov_iter_count(from) < vnet_hdr_sz) return -EINVAL; @@ -1772,12 +1772,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EFAULT;
if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) - gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2); + tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2 > tun16_to_cpu(flags, gso.hdr_len)) + gso.hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2);
- if (tun16_to_cpu(tun, gso.hdr_len) > iov_iter_count(from)) + if (tun16_to_cpu(flags, gso.hdr_len) > iov_iter_count(from)) return -EINVAL; - hdr_len = tun16_to_cpu(tun, gso.hdr_len); + hdr_len = tun16_to_cpu(flags, gso.hdr_len); iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); }
@@ -1854,7 +1854,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } }
- if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) { + if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun->flags))) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb; @@ -2108,23 +2108,24 @@ static ssize_t tun_put_user(struct tun_struct *tun,
if (vnet_hdr_sz) { struct virtio_net_hdr gso; + int flags = tun->flags;
if (iov_iter_count(iter) < vnet_hdr_sz) return -EINVAL;
if (virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun), true, + tun_is_little_endian(flags), true, vlan_hlen)) { struct skb_shared_info *sinfo = skb_shinfo(skb);
if (net_ratelimit()) { netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); + sinfo->gso_type, tun16_to_cpu(flags, gso.gso_size), + tun16_to_cpu(flags, gso.hdr_len)); print_hex_dump(KERN_ERR, "tun: ", DUMP_PREFIX_NONE, 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + min((int)tun16_to_cpu(flags, gso.hdr_len), 64), true); } WARN_ON_ONCE(1); return -EINVAL; @@ -2493,7 +2494,7 @@ static int tun_xdp_one(struct tun_struct *tun, skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data);
- if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { + if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun->flags))) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL; @@ -3322,11 +3323,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break;
case TUNGETVNETBE: - ret = tun_get_vnet_be(tun, argp); + ret = tun_get_vnet_be(tun->flags, argp); break;
case TUNSETVNETBE: - ret = tun_set_vnet_be(tun, argp); + ret = tun_set_vnet_be(&tun->flags, argp); break;
case TUNATTACHFILTER:
Akihiko Odaki wrote:
Decouple vnet-related functions from tun_struct so that we can reuse them for tap in the future.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
Reviewed-by: Willem de Bruijn willemb@google.com
Decouple the vnet handling code so that we can reuse it for tap.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tun.c | 229 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 133 insertions(+), 96 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index add09dfdada5f76da87ae568072d121c2fc21caf..20659a62bb51d2a497a9d3e9e3b3ee7e9fad4f35 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -351,6 +351,122 @@ static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) return __cpu_to_virtio16(tun_is_little_endian(flags), val); }
+static long tun_vnet_ioctl(int *vnet_hdr_len_sz, unsigned int *flags, + unsigned int cmd, int __user *sp) +{ + int s; + + switch (cmd) { + case TUNGETVNETHDRSZ: + s = *vnet_hdr_len_sz; + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + *vnet_hdr_len_sz = s; + return 0; + + case TUNGETVNETLE: + s = !!(*flags & TUN_VNET_LE); + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETLE: + if (get_user(s, sp)) + return -EFAULT; + if (s) + *flags |= TUN_VNET_LE; + else + *flags &= ~TUN_VNET_LE; + return 0; + + case TUNGETVNETBE: + return tun_get_vnet_be(*flags, sp); + + case TUNSETVNETBE: + return tun_set_vnet_be(flags, sp); + + default: + return -EINVAL; + } +} + +static int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from, + struct virtio_net_hdr *hdr) +{ + if (iov_iter_count(from) < sz) + return -EINVAL; + + if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) + return -EFAULT; + + if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2 > tun16_to_cpu(flags, hdr->hdr_len)) + hdr->hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2); + + if (tun16_to_cpu(flags, hdr->hdr_len) > iov_iter_count(from)) + return -EINVAL; + + iov_iter_advance(from, sz - sizeof(*hdr)); + + return tun16_to_cpu(flags, hdr->hdr_len); +} + +static int tun_vnet_hdr_put(int sz, struct iov_iter *iter, + const struct virtio_net_hdr *hdr) +{ + if (unlikely(iov_iter_count(iter) < sz)) + return -EINVAL; + + if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) + return -EFAULT; + + iov_iter_advance(iter, sz - sizeof(*hdr)); + + return 0; +} + +static int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, + const struct virtio_net_hdr *hdr) +{ + return virtio_net_hdr_to_skb(skb, hdr, tun_is_little_endian(flags)); +} + +static int tun_vnet_hdr_from_skb(unsigned int flags, + const struct net_device *dev, + const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; + + if (virtio_net_hdr_from_skb(skb, hdr, + tun_is_little_endian(flags), true, + vlan_hlen)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + if (net_ratelimit()) { + netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun16_to_cpu(flags, hdr->gso_size), + tun16_to_cpu(flags, hdr->hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min(tun16_to_cpu(flags, hdr->hdr_len), 64), true); + } + WARN_ON_ONCE(1); + return -EINVAL; + } + + return 0; +} + static inline u32 tun_hashfn(u32 rxhash) { return rxhash & TUN_MASK_FLOW_ENTRIES; @@ -1763,22 +1879,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - int flags = tun->flags; - - if (iov_iter_count(from) < vnet_hdr_sz) - return -EINVAL; - - if (!copy_from_iter_full(&gso, sizeof(gso), from)) - return -EFAULT;
- if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2 > tun16_to_cpu(flags, gso.hdr_len)) - gso.hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2); - - if (tun16_to_cpu(flags, gso.hdr_len) > iov_iter_count(from)) - return -EINVAL; - hdr_len = tun16_to_cpu(flags, gso.hdr_len); - iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); + hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso); + if (hdr_len < 0) + return hdr_len; }
len = iov_iter_count(from); @@ -1854,7 +1958,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } }
- if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun->flags))) { + if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb; @@ -2049,18 +2153,15 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun, { int vnet_hdr_sz = 0; size_t size = xdp_frame->len; - size_t ret; + ssize_t ret;
if (tun->flags & IFF_VNET_HDR) { struct virtio_net_hdr gso = { 0 };
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - if (unlikely(iov_iter_count(iter) < vnet_hdr_sz)) - return -EINVAL; - if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) != - sizeof(gso))) - return -EFAULT; - iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + if (ret) + return ret; }
ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz; @@ -2083,6 +2184,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, int vlan_offset = 0; int vlan_hlen = 0; int vnet_hdr_sz = 0; + int ret;
if (skb_vlan_tag_present(skb)) vlan_hlen = VLAN_HLEN; @@ -2108,33 +2210,14 @@ static ssize_t tun_put_user(struct tun_struct *tun,
if (vnet_hdr_sz) { struct virtio_net_hdr gso; - int flags = tun->flags; - - if (iov_iter_count(iter) < vnet_hdr_sz) - return -EINVAL; - - if (virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(flags), true, - vlan_hlen)) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - - if (net_ratelimit()) { - netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(flags, gso.gso_size), - tun16_to_cpu(flags, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(flags, gso.hdr_len), 64), true); - } - WARN_ON_ONCE(1); - return -EINVAL; - }
- if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso)) - return -EFAULT; + ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso); + if (ret) + return ret;
- iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + if (ret) + return ret; }
if (vlan_hlen) { @@ -2494,7 +2577,7 @@ static int tun_xdp_one(struct tun_struct *tun, skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data);
- if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun->flags))) { + if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL; @@ -3078,8 +3161,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, kgid_t group; int ifindex; int sndbuf; - int vnet_hdr_sz; - int le; int ret; bool do_notify = false;
@@ -3286,50 +3367,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun_set_sndbuf(tun); break;
- case TUNGETVNETHDRSZ: - vnet_hdr_sz = tun->vnet_hdr_sz; - if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) - ret = -EFAULT; - break; - - case TUNSETVNETHDRSZ: - if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { - ret = -EFAULT; - break; - } - if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { - ret = -EINVAL; - break; - } - - tun->vnet_hdr_sz = vnet_hdr_sz; - break; - - case TUNGETVNETLE: - le = !!(tun->flags & TUN_VNET_LE); - if (put_user(le, (int __user *)argp)) - ret = -EFAULT; - break; - - case TUNSETVNETLE: - if (get_user(le, (int __user *)argp)) { - ret = -EFAULT; - break; - } - if (le) - tun->flags |= TUN_VNET_LE; - else - tun->flags &= ~TUN_VNET_LE; - break; - - case TUNGETVNETBE: - ret = tun_get_vnet_be(tun->flags, argp); - break; - - case TUNSETVNETBE: - ret = tun_set_vnet_be(&tun->flags, argp); - break; - case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; @@ -3385,7 +3422,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break;
default: - ret = -EINVAL; + ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp); break; }
Akihiko Odaki wrote:
Decouple the vnet handling code so that we can reuse it for tap.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
Reviewed-by: Willem de Bruijn willemb@google.com
The vnet handling code will be reused by tap.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- MAINTAINERS | 2 +- drivers/net/Kconfig | 4 ++ drivers/net/Makefile | 1 + drivers/net/tun.c | 174 +--------------------------------------------- drivers/net/tun_vnet.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/net/tun_vnet.h | 25 +++++++ 6 files changed, 217 insertions(+), 173 deletions(-)
diff --git a/MAINTAINERS b/MAINTAINERS index 910305c11e8a882da5b49ce5bd55011b93f28c32..bc32b7e23c79ab80b19c8207f14c5e51a47ec89f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23902,7 +23902,7 @@ W: http://vtun.sourceforge.net/tun F: Documentation/networking/tuntap.rst F: arch/um/os-Linux/drivers/ F: drivers/net/tap.c -F: drivers/net/tun.c +F: drivers/net/tun*
TURBOCHANNEL SUBSYSTEM M: "Maciej W. Rozycki" macro@orcam.me.uk diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1fd5acdc73c6af0e1a861867039c3624fc618e25..924bf61f12a49566b26a78f42cea5ca1c48537c5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -391,10 +391,14 @@ config RIONET_RX_SIZE depends on RIONET default "128"
+config TUN_VNET + tristate + config TUN tristate "Universal TUN/TAP device driver support" depends on INET select CRC32 + select TUN_VNET help TUN/TAP provides packet reception and transmission for user space programs. It can be viewed as a simple Point-to-Point or Ethernet diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 13743d0e83b5fde479e9b30ad736be402d880dee..f6590f2795cf742ab15047d8f1b2d2d8661954a3 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,7 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ +obj-$(CONFIG_TUN_VNET) += tun_vnet.o obj-$(CONFIG_TUN) += tun.o obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 20659a62bb51d2a497a9d3e9e3b3ee7e9fad4f35..21abd3613cacda175d4f469f580a2994b2f836e8 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -83,6 +83,8 @@ #include <linux/uaccess.h> #include <linux/proc_fs.h>
+#include "tun_vnet.h" + static void tun_default_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd);
@@ -94,9 +96,6 @@ static void tun_default_link_ksettings(struct net_device *dev, * overload it to mean fasync when stored there. */ #define TUN_FASYNC IFF_ATTACH_QUEUE -/* High bits in flags field are unused. */ -#define TUN_VNET_LE 0x80000000 -#define TUN_VNET_BE 0x40000000
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS) @@ -298,175 +297,6 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; }
-static inline bool tun_legacy_is_little_endian(unsigned int flags) -{ - return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && - (flags & TUN_VNET_BE)) && - virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(unsigned int flags, int __user *argp) -{ - int be = !!(flags & TUN_VNET_BE); - - if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) - return -EINVAL; - - if (put_user(be, argp)) - return -EFAULT; - - return 0; -} - -static long tun_set_vnet_be(unsigned int *flags, int __user *argp) -{ - int be; - - if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) - return -EINVAL; - - if (get_user(be, argp)) - return -EFAULT; - - if (be) - *flags |= TUN_VNET_BE; - else - *flags &= ~TUN_VNET_BE; - - return 0; -} - -static inline bool tun_is_little_endian(unsigned int flags) -{ - return flags & TUN_VNET_LE || tun_legacy_is_little_endian(flags); -} - -static inline u16 tun16_to_cpu(unsigned int flags, __virtio16 val) -{ - return __virtio16_to_cpu(tun_is_little_endian(flags), val); -} - -static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) -{ - return __cpu_to_virtio16(tun_is_little_endian(flags), val); -} - -static long tun_vnet_ioctl(int *vnet_hdr_len_sz, unsigned int *flags, - unsigned int cmd, int __user *sp) -{ - int s; - - switch (cmd) { - case TUNGETVNETHDRSZ: - s = *vnet_hdr_len_sz; - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETHDRSZ: - if (get_user(s, sp)) - return -EFAULT; - if (s < (int)sizeof(struct virtio_net_hdr)) - return -EINVAL; - - *vnet_hdr_len_sz = s; - return 0; - - case TUNGETVNETLE: - s = !!(*flags & TUN_VNET_LE); - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETLE: - if (get_user(s, sp)) - return -EFAULT; - if (s) - *flags |= TUN_VNET_LE; - else - *flags &= ~TUN_VNET_LE; - return 0; - - case TUNGETVNETBE: - return tun_get_vnet_be(*flags, sp); - - case TUNSETVNETBE: - return tun_set_vnet_be(flags, sp); - - default: - return -EINVAL; - } -} - -static int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from, - struct virtio_net_hdr *hdr) -{ - if (iov_iter_count(from) < sz) - return -EINVAL; - - if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) - return -EFAULT; - - if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2 > tun16_to_cpu(flags, hdr->hdr_len)) - hdr->hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2); - - if (tun16_to_cpu(flags, hdr->hdr_len) > iov_iter_count(from)) - return -EINVAL; - - iov_iter_advance(from, sz - sizeof(*hdr)); - - return tun16_to_cpu(flags, hdr->hdr_len); -} - -static int tun_vnet_hdr_put(int sz, struct iov_iter *iter, - const struct virtio_net_hdr *hdr) -{ - if (unlikely(iov_iter_count(iter) < sz)) - return -EINVAL; - - if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) - return -EFAULT; - - iov_iter_advance(iter, sz - sizeof(*hdr)); - - return 0; -} - -static int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, - const struct virtio_net_hdr *hdr) -{ - return virtio_net_hdr_to_skb(skb, hdr, tun_is_little_endian(flags)); -} - -static int tun_vnet_hdr_from_skb(unsigned int flags, - const struct net_device *dev, - const struct sk_buff *skb, - struct virtio_net_hdr *hdr) -{ - int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; - - if (virtio_net_hdr_from_skb(skb, hdr, - tun_is_little_endian(flags), true, - vlan_hlen)) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - - if (net_ratelimit()) { - netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(flags, hdr->gso_size), - tun16_to_cpu(flags, hdr->hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min(tun16_to_cpu(flags, hdr->hdr_len), 64), true); - } - WARN_ON_ONCE(1); - return -EINVAL; - } - - return 0; -} - static inline u32 tun_hashfn(u32 rxhash) { return rxhash & TUN_MASK_FLOW_ENTRIES; diff --git a/drivers/net/tun_vnet.c b/drivers/net/tun_vnet.c new file mode 100644 index 0000000000000000000000000000000000000000..5e5dd9c6aa38323914f714ba82d9848c29fba484 --- /dev/null +++ b/drivers/net/tun_vnet.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include "tun_vnet.h" + +/* High bits in flags field are unused. */ +#define TUN_VNET_LE 0x80000000 +#define TUN_VNET_BE 0x40000000 + +static inline bool tun_legacy_is_little_endian(unsigned int flags) +{ + return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && + (flags & TUN_VNET_BE)) && + virtio_legacy_is_little_endian(); +} + +static long tun_get_vnet_be(unsigned int flags, int __user *argp) +{ + int be = !!(flags & TUN_VNET_BE); + + if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + + if (put_user(be, argp)) + return -EFAULT; + + return 0; +} + +static long tun_set_vnet_be(unsigned int *flags, int __user *argp) +{ + int be; + + if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + + if (get_user(be, argp)) + return -EFAULT; + + if (be) + *flags |= TUN_VNET_BE; + else + *flags &= ~TUN_VNET_BE; + + return 0; +} + +static inline bool tun_is_little_endian(unsigned int flags) +{ + return flags & TUN_VNET_LE || tun_legacy_is_little_endian(flags); +} + +static inline u16 tun16_to_cpu(unsigned int flags, __virtio16 val) +{ + return __virtio16_to_cpu(tun_is_little_endian(flags), val); +} + +static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) +{ + return __cpu_to_virtio16(tun_is_little_endian(flags), val); +} + +long tun_vnet_ioctl(int *vnet_hdr_len_sz, unsigned int *flags, + unsigned int cmd, int __user *sp) +{ + int s; + + switch (cmd) { + case TUNGETVNETHDRSZ: + s = *vnet_hdr_len_sz; + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + *vnet_hdr_len_sz = s; + return 0; + + case TUNGETVNETLE: + s = !!(*flags & TUN_VNET_LE); + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETLE: + if (get_user(s, sp)) + return -EFAULT; + if (s) + *flags |= TUN_VNET_LE; + else + *flags &= ~TUN_VNET_LE; + return 0; + + case TUNGETVNETBE: + return tun_get_vnet_be(*flags, sp); + + case TUNSETVNETBE: + return tun_set_vnet_be(flags, sp); + + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_GPL(tun_vnet_ioctl); + +int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from, + struct virtio_net_hdr *hdr) +{ + if (iov_iter_count(from) < sz) + return -EINVAL; + + if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) + return -EFAULT; + + if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2 > tun16_to_cpu(flags, hdr->hdr_len)) + hdr->hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2); + + if (tun16_to_cpu(flags, hdr->hdr_len) > iov_iter_count(from)) + return -EINVAL; + + iov_iter_advance(from, sz - sizeof(*hdr)); + + return tun16_to_cpu(flags, hdr->hdr_len); +} +EXPORT_SYMBOL_GPL(tun_vnet_hdr_get); + +int tun_vnet_hdr_put(int sz, struct iov_iter *iter, + const struct virtio_net_hdr *hdr) +{ + if (unlikely(iov_iter_count(iter) < sz)) + return -EINVAL; + + if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) + return -EFAULT; + + iov_iter_advance(iter, sz - sizeof(*hdr)); + + return 0; +} +EXPORT_SYMBOL_GPL(tun_vnet_hdr_put); + +int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, + const struct virtio_net_hdr *hdr) +{ + return virtio_net_hdr_to_skb(skb, hdr, tun_is_little_endian(flags)); +} +EXPORT_SYMBOL_GPL(tun_vnet_hdr_to_skb); + +int tun_vnet_hdr_from_skb(unsigned int flags, + const struct net_device *dev, + const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; + + if (virtio_net_hdr_from_skb(skb, hdr, + tun_is_little_endian(flags), true, + vlan_hlen)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + if (net_ratelimit()) { + netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun16_to_cpu(flags, hdr->gso_size), + tun16_to_cpu(flags, hdr->hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min(tun16_to_cpu(flags, hdr->hdr_len), 64), true); + } + WARN_ON_ONCE(1); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(tun_vnet_hdr_from_skb); + +MODULE_DESCRIPTION("Common library for drivers implementing TUN/TAP's virtio-related features"); +MODULE_AUTHOR("Max Krasnyansky maxk@qualcomm.com"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h new file mode 100644 index 0000000000000000000000000000000000000000..a8d6e474933399d9c6161bef31e385f5c538da1c --- /dev/null +++ b/drivers/net/tun_vnet.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef TUN_VNET_H +#define TUN_VNET_H + +#include <linux/if_tun.h> +#include <linux/virtio_net.h> + +long tun_vnet_ioctl(int *sz, unsigned int *flags, + unsigned int cmd, int __user *sp); + +int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from, + struct virtio_net_hdr *hdr); + +int tun_vnet_hdr_put(int sz, struct iov_iter *iter, + const struct virtio_net_hdr *hdr); + +int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, + const struct virtio_net_hdr *hdr); + +int tun_vnet_hdr_from_skb(unsigned int flags, + const struct net_device *dev, + const struct sk_buff *skb, + struct virtio_net_hdr *hdr); + +#endif /* TUN_VNET_H */
tap_get_user() used to track the length of iov_iter with another variable. We can use iov_iter_count() to determine the current length to avoid such chores.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 5aa41d5f7765a6dcf185bccd3cba2299bad89398..061c2f27dfc83f5e6d0bea4da0e845cc429b1fd8 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -641,7 +641,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, struct sk_buff *skb; struct tap_dev *tap; unsigned long total_len = iov_iter_count(from); - unsigned long len = total_len; + unsigned long len; int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; @@ -655,9 +655,8 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
err = -EINVAL; - if (len < vnet_hdr_len) + if (iov_iter_count(from) < vnet_hdr_len) goto err; - len -= vnet_hdr_len;
err = -EFAULT; if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) @@ -671,10 +670,12 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, tap16_to_cpu(q, vnet_hdr.csum_start) + tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); err = -EINVAL; - if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len) + if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) goto err; }
+ len = iov_iter_count(from); + err = -EINVAL; if (unlikely(len < ETH_HLEN)) goto err;
Akihiko Odaki wrote:
tap_get_user() used to track the length of iov_iter with another variable. We can use iov_iter_count() to determine the current length to avoid such chores.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/tap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 5aa41d5f7765a6dcf185bccd3cba2299bad89398..061c2f27dfc83f5e6d0bea4da0e845cc429b1fd8 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -641,7 +641,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, struct sk_buff *skb; struct tap_dev *tap; unsigned long total_len = iov_iter_count(from);
- unsigned long len = total_len;
- unsigned long len; int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0;
@@ -655,9 +655,8 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); err = -EINVAL;
if (len < vnet_hdr_len)
if (iov_iter_count(from) < vnet_hdr_len) goto err;
len -= vnet_hdr_len;
err = -EFAULT; if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) @@ -671,10 +670,12 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, tap16_to_cpu(q, vnet_hdr.csum_start) + tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); err = -EINVAL;
if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len)
}if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) goto err;
- len = iov_iter_count(from);
Since len is still used in the rest of the function, might as well use it from the start. I'd drop this patch.
hdr_len is repeatedly used so keep it in a local variable.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tap.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 061c2f27dfc83f5e6d0bea4da0e845cc429b1fd8..7ee2e9ee2a89fd539b087496b92d2f6198266f44 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -645,6 +645,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; + int hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false; @@ -672,6 +673,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, err = -EINVAL; if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) goto err; + hdr_len = tap16_to_cpu(q, vnet_hdr.hdr_len); }
len = iov_iter_count(from); @@ -683,11 +685,8 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i;
- copylen = vnet_hdr.hdr_len ? - tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; - else if (copylen < ETH_HLEN) + copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear); + if (copylen < ETH_HLEN) copylen = ETH_HLEN; linear = copylen; i = *from; @@ -698,11 +697,9 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
if (!zerocopy) { copylen = len; - linear = tap16_to_cpu(q, vnet_hdr.hdr_len); - if (linear > good_linear) - linear = good_linear; - else if (linear < ETH_HLEN) - linear = ETH_HLEN; + linear = min(hdr_len, good_linear); + if (copylen < ETH_HLEN) + copylen = ETH_HLEN; }
skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen,
Akihiko Odaki wrote:
hdr_len is repeatedly used so keep it in a local variable.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/tap.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 061c2f27dfc83f5e6d0bea4da0e845cc429b1fd8..7ee2e9ee2a89fd539b087496b92d2f6198266f44 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -645,6 +645,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0;
- int hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false;
@@ -672,6 +673,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, err = -EINVAL; if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) goto err;
}hdr_len = tap16_to_cpu(q, vnet_hdr.hdr_len);
len = iov_iter_count(from); @@ -683,11 +685,8 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i;
copylen = vnet_hdr.hdr_len ?
tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN;
if (copylen > good_linear)
copylen = good_linear;
else if (copylen < ETH_HLEN)
copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear);
linear = copylen; i = *from;if (copylen < ETH_HLEN) copylen = ETH_HLEN;
@@ -698,11 +697,9 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (!zerocopy) { copylen = len;
linear = tap16_to_cpu(q, vnet_hdr.hdr_len);
if (linear > good_linear)
linear = good_linear;
else if (linear < ETH_HLEN)
linear = ETH_HLEN;
linear = min(hdr_len, good_linear);
if (copylen < ETH_HLEN)
copylen = ETH_HLEN;
Similar to previous patch, I don't think this patch is significant enough to warrant the code churn.
On 2025/01/20 20:24, Willem de Bruijn wrote:
Akihiko Odaki wrote:
hdr_len is repeatedly used so keep it in a local variable.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/tap.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 061c2f27dfc83f5e6d0bea4da0e845cc429b1fd8..7ee2e9ee2a89fd539b087496b92d2f6198266f44 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -645,6 +645,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0;
- int hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false;
@@ -672,6 +673,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, err = -EINVAL; if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) goto err;
}hdr_len = tap16_to_cpu(q, vnet_hdr.hdr_len);
len = iov_iter_count(from); @@ -683,11 +685,8 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i;
copylen = vnet_hdr.hdr_len ?
tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN;
if (copylen > good_linear)
copylen = good_linear;
else if (copylen < ETH_HLEN)
copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear);
linear = copylen; i = *from;if (copylen < ETH_HLEN) copylen = ETH_HLEN;
@@ -698,11 +697,9 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (!zerocopy) { copylen = len;
linear = tap16_to_cpu(q, vnet_hdr.hdr_len);
if (linear > good_linear)
linear = good_linear;
else if (linear < ETH_HLEN)
linear = ETH_HLEN;
linear = min(hdr_len, good_linear);
if (copylen < ETH_HLEN)
copylen = ETH_HLEN;
Similar to previous patch, I don't think this patch is significant enough to warrant the code churn.
The following patch will require replacing tap16_to_cpu(q, vnet_hdr.hdr_len) with tap16_to_cpu(q->flags, vnet_hdr.hdr_len)
It will make some lines a bit too long. Calling tap16_to_cpu() at multiple places is also not good to keep the vnet implementation unified as the function inspects vnet_hdr.
This patch is independently too trivial, but I think it is a worthwhile cleanup combined with the following patch.
Akihiko Odaki wrote:
On 2025/01/20 20:24, Willem de Bruijn wrote:
Akihiko Odaki wrote:
hdr_len is repeatedly used so keep it in a local variable.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/tap.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 061c2f27dfc83f5e6d0bea4da0e845cc429b1fd8..7ee2e9ee2a89fd539b087496b92d2f6198266f44 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -645,6 +645,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0;
- int hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false;
@@ -672,6 +673,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, err = -EINVAL; if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) goto err;
}hdr_len = tap16_to_cpu(q, vnet_hdr.hdr_len);
len = iov_iter_count(from); @@ -683,11 +685,8 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i;
copylen = vnet_hdr.hdr_len ?
tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN;
if (copylen > good_linear)
copylen = good_linear;
else if (copylen < ETH_HLEN)
copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear);
linear = copylen; i = *from;if (copylen < ETH_HLEN) copylen = ETH_HLEN;
@@ -698,11 +697,9 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (!zerocopy) { copylen = len;
linear = tap16_to_cpu(q, vnet_hdr.hdr_len);
if (linear > good_linear)
linear = good_linear;
else if (linear < ETH_HLEN)
linear = ETH_HLEN;
linear = min(hdr_len, good_linear);
if (copylen < ETH_HLEN)
copylen = ETH_HLEN;
Similar to previous patch, I don't think this patch is significant enough to warrant the code churn.
The following patch will require replacing tap16_to_cpu(q, vnet_hdr.hdr_len) with tap16_to_cpu(q->flags, vnet_hdr.hdr_len)
It will make some lines a bit too long. Calling tap16_to_cpu() at multiple places is also not good to keep the vnet implementation unified as the function inspects vnet_hdr.
This patch is independently too trivial, but I think it is a worthwhile cleanup combined with the following patch.
Ok. I see what you mean.
tun and tap implements the same vnet-related features so reuse the code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/Kconfig | 1 + drivers/net/tap.c | 152 ++++++---------------------------------------------- 2 files changed, 16 insertions(+), 137 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 924bf61f12a49566b26a78f42cea5ca1c48537c5..f8aa35bf8a93ac1c7f76b85919ac110cb06f21fb 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -421,6 +421,7 @@ config TUN
config TAP tristate + select TUN_VNET help This option is selected by any driver implementing tap user space interface for a virtual interface to re-use core tap functionality. diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 7ee2e9ee2a89fd539b087496b92d2f6198266f44..4f3cc3b2e3c6fb387ee2aaeef54c3faf39d90f10 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -26,74 +26,9 @@ #include <linux/virtio_net.h> #include <linux/skb_array.h>
-#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) - -#define TAP_VNET_LE 0x80000000 -#define TAP_VNET_BE 0x40000000 - -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s = !!(q->flags & TAP_VNET_BE); - - if (put_user(s, sp)) - return -EFAULT; - - return 0; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s; - - if (get_user(s, sp)) - return -EFAULT; - - if (s) - q->flags |= TAP_VNET_BE; - else - q->flags &= ~TAP_VNET_BE; - - return 0; -} -#else -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool tap_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_LE || - tap_legacy_is_little_endian(q); -} - -static inline u16 tap16_to_cpu(struct tap_queue *q, __virtio16 val) -{ - return __virtio16_to_cpu(tap_is_little_endian(q), val); -} +#include "tun_vnet.h"
-static inline __virtio16 cpu_to_tap16(struct tap_queue *q, u16 val) -{ - return __cpu_to_virtio16(tap_is_little_endian(q), val); -} +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
static struct proto tap_proto = { .name = "tap", @@ -655,25 +590,11 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
- err = -EINVAL; - if (iov_iter_count(from) < vnet_hdr_len) - goto err; - - err = -EFAULT; - if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) - goto err; - iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr)); - if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > - tap16_to_cpu(q, vnet_hdr.hdr_len)) - vnet_hdr.hdr_len = cpu_to_tap16(q, - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); - err = -EINVAL; - if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) + hdr_len = tun_vnet_hdr_get(vnet_hdr_len, q->flags, from, &vnet_hdr); + if (hdr_len < 0) { + err = hdr_len; goto err; - hdr_len = tap16_to_cpu(q, vnet_hdr.hdr_len); + } }
len = iov_iter_count(from); @@ -731,8 +652,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, skb->dev = tap->dev;
if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, &vnet_hdr, - tap_is_little_endian(q)); + err = tun_vnet_hdr_to_skb(q->flags, skb, &vnet_hdr); if (err) { rcu_read_unlock(); drop_reason = SKB_DROP_REASON_DEV_HDR; @@ -795,23 +715,17 @@ static ssize_t tap_put_user(struct tap_queue *q, int total;
if (q->flags & IFF_VNET_HDR) { - int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; struct virtio_net_hdr vnet_hdr;
vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); - if (iov_iter_count(iter) < vnet_hdr_len) - return -EINVAL;
- if (virtio_net_hdr_from_skb(skb, &vnet_hdr, - tap_is_little_endian(q), true, - vlan_hlen)) - BUG(); - - if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != - sizeof(vnet_hdr)) - return -EFAULT; + ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr); + if (ret) + return ret;
- iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr)); + ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr); + if (ret) + return ret; } total = vnet_hdr_len; total += skb->len; @@ -1070,42 +984,6 @@ static long tap_ioctl(struct file *file, unsigned int cmd, q->sk.sk_sndbuf = s; return 0;
- case TUNGETVNETHDRSZ: - s = q->vnet_hdr_sz; - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETHDRSZ: - if (get_user(s, sp)) - return -EFAULT; - if (s < (int)sizeof(struct virtio_net_hdr)) - return -EINVAL; - - q->vnet_hdr_sz = s; - return 0; - - case TUNGETVNETLE: - s = !!(q->flags & TAP_VNET_LE); - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETLE: - if (get_user(s, sp)) - return -EFAULT; - if (s) - q->flags |= TAP_VNET_LE; - else - q->flags &= ~TAP_VNET_LE; - return 0; - - case TUNGETVNETBE: - return tap_get_vnet_be(q, sp); - - case TUNSETVNETBE: - return tap_set_vnet_be(q, sp); - case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | @@ -1149,7 +1027,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, return ret;
default: - return -EINVAL; + return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp); } }
@@ -1196,7 +1074,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) skb->protocol = eth_hdr(skb)->h_proto;
if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q)); + err = tun_vnet_hdr_to_skb(q->flags, skb, gso); if (err) goto err_kfree; }
Akihiko Odaki wrote:
When I implemented virtio's hash-related features to tun/tap [1], I found tun/tap does not fill the entire region reserved for the virtio header, leaving some uninitialized hole in the middle of the buffer after read()/recvmesg().
This series fills the uninitialized hole. More concretely, the num_buffers field will be initialized with 1, and the other fields will be inialized with 0. Setting the num_buffers field to 1 is mandated by virtio 1.0 [2].
The change to virtio header is preceded by another change that refactors tun and tap to unify their virtio-related code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
net-next is closed. But I'll review for when you resend.
Changes in v4:
- s/sz/vnet_hdr_len_sz/ for patch "tun: Decouple vnet handling" (Willem de Bruijn)
Minor, and sorry my bad: len_sz is duplicative, vnet_hdr_sz is common.
- Reverted to add CONFIG_TUN_VNET.
- Link to v3: https://lore.kernel.org/r/20250116-tun-v3-0-c6b2871e97f7@daynix.com
Changes in v3:
- Dropped changes to fill the vnet header.
- Splitted patch "tun: Unify vnet implementation".
- Reverted spurious changes in patch "tun: Unify vnet implementation".
- Merged tun_vnet.c into TAP.
- Link to v2: https://lore.kernel.org/r/20250109-tun-v2-0-388d7d5a287a@daynix.com
Changes in v2:
- Fixed num_buffers endian.
- Link to v1: https://lore.kernel.org/r/20250108-tun-v1-0-67d784b34374@daynix.com
Akihiko Odaki (9): tun: Refactor CONFIG_TUN_VNET_CROSS_LE tun: Avoid double-tracking iov_iter length changes tun: Keep hdr_len in tun_get_user() tun: Decouple vnet from tun_struct tun: Decouple vnet handling tun: Extract the vnet handling code tap: Avoid double-tracking iov_iter length changes tap: Keep hdr_len in tap_get_user() tap: Use tun's vnet-related code
MAINTAINERS | 2 +- drivers/net/Kconfig | 5 ++ drivers/net/Makefile | 1 + drivers/net/tap.c | 172 ++++++------------------------------------ drivers/net/tun.c | 200 +++++++------------------------------------------ drivers/net/tun_vnet.c | 184 +++++++++++++++++++++++++++++++++++++++++++++ drivers/net/tun_vnet.h | 25 +++++++ 7 files changed, 267 insertions(+), 322 deletions(-)
base-commit: a32e14f8aef69b42826cf0998b068a43d486a9e9 change-id: 20241230-tun-66e10a49b0c7
Best regards,
Akihiko Odaki akihiko.odaki@daynix.com
linux-kselftest-mirror@lists.linaro.org