When I implemented virtio's hash-related features to tun/tap [1], I found tun/tap does not fill the entire region reserved for the virtio header, leaving some uninitialized hole in the middle of the buffer after read()/recvmesg().
This series fills the uninitialized hole. More concretely, the num_buffers field will be initialized with 1, and the other fields will be inialized with 0. Setting the num_buffers field to 1 is mandated by virtio 1.0 [2].
The change to virtio header is preceded by another change that refactors tun and tap to unify their virtio-related code.
[1]: https://lore.kernel.org/r/20241008-rss-v5-0-f3cf68df005d@daynix.com [2]: https://lore.kernel.org/r/20241227084256-mutt-send-email-mst@kernel.org/
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- Changes in v3: - Dropped changes to fill the vnet header. - Splitted patch "tun: Unify vnet implementation". - Reverted spurious changes in patch "tun: Unify vnet implementation". - Merged tun_vnet.c into TAP. - Link to v2: https://lore.kernel.org/r/20250109-tun-v2-0-388d7d5a287a@daynix.com
Changes in v2: - Fixed num_buffers endian. - Link to v1: https://lore.kernel.org/r/20250108-tun-v1-0-67d784b34374@daynix.com
--- Akihiko Odaki (9): tun: Refactor CONFIG_TUN_VNET_CROSS_LE tun: Avoid double-tracking iov_iter length changes tun: Keep hdr_len in tun_get_user() tun: Decouple vnet from tun_struct tun: Decouple vnet handling tun: Extract the vnet handling code tap: Avoid double-tracking iov_iter length changes tap: Keep hdr_len in tap_get_user() tap: Use tun's vnet-related code
MAINTAINERS | 2 +- drivers/net/Kconfig | 1 + drivers/net/Makefile | 3 +- drivers/net/tap.c | 172 ++++++------------------------------------ drivers/net/tun.c | 200 +++++++------------------------------------------ drivers/net/tun_vnet.c | 180 ++++++++++++++++++++++++++++++++++++++++++++ drivers/net/tun_vnet.h | 25 +++++++ 7 files changed, 260 insertions(+), 323 deletions(-) --- base-commit: a32e14f8aef69b42826cf0998b068a43d486a9e9 change-id: 20241230-tun-66e10a49b0c7
Best regards,
Check IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) to save some lines and make future changes easier.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tun.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e816aaba8e5f..452fc5104260 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -298,10 +298,10 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; }
-#ifdef CONFIG_TUN_VNET_CROSS_LE static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) { - return tun->flags & TUN_VNET_BE ? false : + return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && + (tun->flags & TUN_VNET_BE)) && virtio_legacy_is_little_endian(); }
@@ -309,6 +309,9 @@ static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) { int be = !!(tun->flags & TUN_VNET_BE);
+ if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + if (put_user(be, argp)) return -EFAULT;
@@ -319,6 +322,9 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) { int be;
+ if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + if (get_user(be, argp)) return -EFAULT;
@@ -329,22 +335,6 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
return 0; } -#else -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */
static inline bool tun_is_little_endian(struct tun_struct *tun) {
Akihiko Odaki wrote:
Check IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) to save some lines and make future changes easier.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
Reviewed-by: Willem de Bruijn willemb@google.com
tun_get_user() used to track the length of iov_iter with another variable. We can use iov_iter_count() to determine the current length to avoid such chores.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tun.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 452fc5104260..bd272b4736fb 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1742,7 +1742,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; size_t total_len = iov_iter_count(from); - size_t len = total_len, align = tun->align, linear; + size_t len, align = tun->align, linear; struct virtio_net_hdr gso = { 0 }; int good_linear; int copylen; @@ -1754,9 +1754,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (!(tun->flags & IFF_NO_PI)) { - if (len < sizeof(pi)) + if (iov_iter_count(from) < sizeof(pi)) return -EINVAL; - len -= sizeof(pi);
if (!copy_from_iter_full(&pi, sizeof(pi), from)) return -EFAULT; @@ -1765,9 +1764,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
- if (len < vnet_hdr_sz) + if (iov_iter_count(from) < vnet_hdr_sz) return -EINVAL; - len -= vnet_hdr_sz;
if (!copy_from_iter_full(&gso, sizeof(gso), from)) return -EFAULT; @@ -1776,11 +1774,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
- if (tun16_to_cpu(tun, gso.hdr_len) > len) + if (tun16_to_cpu(tun, gso.hdr_len) > iov_iter_count(from)) return -EINVAL; iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); }
+ len = iov_iter_count(from); + if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { align += NET_IP_ALIGN; if (unlikely(len < ETH_HLEN ||
Akihiko Odaki wrote:
tun_get_user() used to track the length of iov_iter with another variable. We can use iov_iter_count() to determine the current length to avoid such chores.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
Reviewed-by: Willem de Bruijn willemb@google.com
hdr_len is repeatedly used so keep it in a local variable.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tun.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index bd272b4736fb..ec56ac865848 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1746,6 +1746,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct virtio_net_hdr gso = { 0 }; int good_linear; int copylen; + int hdr_len = 0; bool zerocopy = false; int err; u32 rxhash = 0; @@ -1776,6 +1777,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (tun16_to_cpu(tun, gso.hdr_len) > iov_iter_count(from)) return -EINVAL; + hdr_len = tun16_to_cpu(tun, gso.hdr_len); iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); }
@@ -1783,8 +1785,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { align += NET_IP_ALIGN; - if (unlikely(len < ETH_HLEN || - (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN))) + if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN))) return -EINVAL; }
@@ -1797,9 +1798,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; + copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear); linear = copylen; iov_iter_advance(&i, copylen); if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) @@ -1820,10 +1819,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } else { if (!zerocopy) { copylen = len; - if (tun16_to_cpu(tun, gso.hdr_len) > good_linear) - linear = good_linear; - else - linear = tun16_to_cpu(tun, gso.hdr_len); + linear = min(hdr_len, good_linear); }
if (frags) {
Akihiko Odaki wrote:
hdr_len is repeatedly used so keep it in a local variable.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
Reviewed-by: Willem de Bruijn willemb@google.com
Decouple vnet-related functions from tun_struct so that we can reuse them for tap in the future.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tun.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ec56ac865848..add09dfdada5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -298,16 +298,16 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; }
-static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +static inline bool tun_legacy_is_little_endian(unsigned int flags) { return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && - (tun->flags & TUN_VNET_BE)) && + (flags & TUN_VNET_BE)) && virtio_legacy_is_little_endian(); }
-static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +static long tun_get_vnet_be(unsigned int flags, int __user *argp) { - int be = !!(tun->flags & TUN_VNET_BE); + int be = !!(flags & TUN_VNET_BE);
if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) return -EINVAL; @@ -318,7 +318,7 @@ static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) return 0; }
-static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +static long tun_set_vnet_be(unsigned int *flags, int __user *argp) { int be;
@@ -329,27 +329,26 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) return -EFAULT;
if (be) - tun->flags |= TUN_VNET_BE; + *flags |= TUN_VNET_BE; else - tun->flags &= ~TUN_VNET_BE; + *flags &= ~TUN_VNET_BE;
return 0; }
-static inline bool tun_is_little_endian(struct tun_struct *tun) +static inline bool tun_is_little_endian(unsigned int flags) { - return tun->flags & TUN_VNET_LE || - tun_legacy_is_little_endian(tun); + return flags & TUN_VNET_LE || tun_legacy_is_little_endian(flags); }
-static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) +static inline u16 tun16_to_cpu(unsigned int flags, __virtio16 val) { - return __virtio16_to_cpu(tun_is_little_endian(tun), val); + return __virtio16_to_cpu(tun_is_little_endian(flags), val); }
-static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) +static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) { - return __cpu_to_virtio16(tun_is_little_endian(tun), val); + return __cpu_to_virtio16(tun_is_little_endian(flags), val); }
static inline u32 tun_hashfn(u32 rxhash) @@ -1764,6 +1763,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); + int flags = tun->flags;
if (iov_iter_count(from) < vnet_hdr_sz) return -EINVAL; @@ -1772,12 +1772,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EFAULT;
if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) - gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2); + tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2 > tun16_to_cpu(flags, gso.hdr_len)) + gso.hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2);
- if (tun16_to_cpu(tun, gso.hdr_len) > iov_iter_count(from)) + if (tun16_to_cpu(flags, gso.hdr_len) > iov_iter_count(from)) return -EINVAL; - hdr_len = tun16_to_cpu(tun, gso.hdr_len); + hdr_len = tun16_to_cpu(flags, gso.hdr_len); iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); }
@@ -1854,7 +1854,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } }
- if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) { + if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun->flags))) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb; @@ -2108,23 +2108,24 @@ static ssize_t tun_put_user(struct tun_struct *tun,
if (vnet_hdr_sz) { struct virtio_net_hdr gso; + int flags = tun->flags;
if (iov_iter_count(iter) < vnet_hdr_sz) return -EINVAL;
if (virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun), true, + tun_is_little_endian(flags), true, vlan_hlen)) { struct skb_shared_info *sinfo = skb_shinfo(skb);
if (net_ratelimit()) { netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); + sinfo->gso_type, tun16_to_cpu(flags, gso.gso_size), + tun16_to_cpu(flags, gso.hdr_len)); print_hex_dump(KERN_ERR, "tun: ", DUMP_PREFIX_NONE, 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); + min((int)tun16_to_cpu(flags, gso.hdr_len), 64), true); } WARN_ON_ONCE(1); return -EINVAL; @@ -2493,7 +2494,7 @@ static int tun_xdp_one(struct tun_struct *tun, skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data);
- if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { + if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun->flags))) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL; @@ -3322,11 +3323,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break;
case TUNGETVNETBE: - ret = tun_get_vnet_be(tun, argp); + ret = tun_get_vnet_be(tun->flags, argp); break;
case TUNSETVNETBE: - ret = tun_set_vnet_be(tun, argp); + ret = tun_set_vnet_be(&tun->flags, argp); break;
case TUNATTACHFILTER:
Akihiko Odaki wrote:
Decouple vnet-related functions from tun_struct so that we can reuse them for tap in the future.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/tun.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ec56ac865848..add09dfdada5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -298,16 +298,16 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; } -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +static inline bool tun_legacy_is_little_endian(unsigned int flags) { return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) &&
(tun->flags & TUN_VNET_BE)) &&
virtio_legacy_is_little_endian();(flags & TUN_VNET_BE)) &&
} -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +static long tun_get_vnet_be(unsigned int flags, int __user *argp) {
- int be = !!(tun->flags & TUN_VNET_BE);
- int be = !!(flags & TUN_VNET_BE);
if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) return -EINVAL; @@ -318,7 +318,7 @@ static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) return 0; } -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +static long tun_set_vnet_be(unsigned int *flags, int __user *argp) { int be; @@ -329,27 +329,26 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) return -EFAULT; if (be)
tun->flags |= TUN_VNET_BE;
else*flags |= TUN_VNET_BE;
tun->flags &= ~TUN_VNET_BE;
*flags &= ~TUN_VNET_BE;
return 0; } -static inline bool tun_is_little_endian(struct tun_struct *tun) +static inline bool tun_is_little_endian(unsigned int flags) {
- return tun->flags & TUN_VNET_LE ||
tun_legacy_is_little_endian(tun);
- return flags & TUN_VNET_LE || tun_legacy_is_little_endian(flags);
} -static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) +static inline u16 tun16_to_cpu(unsigned int flags, __virtio16 val) {
- return __virtio16_to_cpu(tun_is_little_endian(tun), val);
- return __virtio16_to_cpu(tun_is_little_endian(flags), val);
} -static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) +static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) {
- return __cpu_to_virtio16(tun_is_little_endian(tun), val);
- return __cpu_to_virtio16(tun_is_little_endian(flags), val);
} static inline u32 tun_hashfn(u32 rxhash) @@ -1764,6 +1763,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
int flags = tun->flags;
Here and elsewhere: instead of passing around and repeatedly parsing flags, have a variable is_little_endian (or is_le)?
if (iov_iter_count(from) < vnet_hdr_sz) return -EINVAL; @@ -1772,12 +1772,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EFAULT; if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len))
gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2 > tun16_to_cpu(flags, gso.hdr_len))
gso.hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2);
if (tun16_to_cpu(tun, gso.hdr_len) > iov_iter_count(from))
if (tun16_to_cpu(flags, gso.hdr_len) > iov_iter_count(from)) return -EINVAL;
hdr_len = tun16_to_cpu(tun, gso.hdr_len);
iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); }hdr_len = tun16_to_cpu(flags, gso.hdr_len);
@@ -1854,7 +1854,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } }
- if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
- if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun->flags))) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb;
@@ -2108,23 +2108,24 @@ static ssize_t tun_put_user(struct tun_struct *tun, if (vnet_hdr_sz) { struct virtio_net_hdr gso;
int flags = tun->flags;
if (iov_iter_count(iter) < vnet_hdr_sz) return -EINVAL; if (virtio_net_hdr_from_skb(skb, &gso,
tun_is_little_endian(tun), true,
tun_is_little_endian(flags), true, vlan_hlen)) { struct skb_shared_info *sinfo = skb_shinfo(skb);
if (net_ratelimit()) { netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
tun16_to_cpu(tun, gso.hdr_len));
sinfo->gso_type, tun16_to_cpu(flags, gso.gso_size),
tun16_to_cpu(flags, gso.hdr_len)); print_hex_dump(KERN_ERR, "tun: ", DUMP_PREFIX_NONE, 16, 1, skb->head,
min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
min((int)tun16_to_cpu(flags, gso.hdr_len), 64), true); } WARN_ON_ONCE(1); return -EINVAL;
@@ -2493,7 +2494,7 @@ static int tun_xdp_one(struct tun_struct *tun, skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data);
- if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
- if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun->flags))) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL;
@@ -3322,11 +3323,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; case TUNGETVNETBE:
ret = tun_get_vnet_be(tun, argp);
break;ret = tun_get_vnet_be(tun->flags, argp);
case TUNSETVNETBE:
ret = tun_set_vnet_be(tun, argp);
break;ret = tun_set_vnet_be(&tun->flags, argp);
case TUNATTACHFILTER:
-- 2.47.1
Willem de Bruijn wrote:
Akihiko Odaki wrote:
Decouple vnet-related functions from tun_struct so that we can reuse them for tap in the future.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/tun.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ec56ac865848..add09dfdada5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -298,16 +298,16 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; } -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) +static inline bool tun_legacy_is_little_endian(unsigned int flags) { return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) &&
(tun->flags & TUN_VNET_BE)) &&
virtio_legacy_is_little_endian();(flags & TUN_VNET_BE)) &&
} -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) +static long tun_get_vnet_be(unsigned int flags, int __user *argp) {
- int be = !!(tun->flags & TUN_VNET_BE);
- int be = !!(flags & TUN_VNET_BE);
if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) return -EINVAL; @@ -318,7 +318,7 @@ static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) return 0; } -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) +static long tun_set_vnet_be(unsigned int *flags, int __user *argp) { int be; @@ -329,27 +329,26 @@ static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) return -EFAULT; if (be)
tun->flags |= TUN_VNET_BE;
else*flags |= TUN_VNET_BE;
tun->flags &= ~TUN_VNET_BE;
*flags &= ~TUN_VNET_BE;
return 0; } -static inline bool tun_is_little_endian(struct tun_struct *tun) +static inline bool tun_is_little_endian(unsigned int flags) {
- return tun->flags & TUN_VNET_LE ||
tun_legacy_is_little_endian(tun);
- return flags & TUN_VNET_LE || tun_legacy_is_little_endian(flags);
} -static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) +static inline u16 tun16_to_cpu(unsigned int flags, __virtio16 val) {
- return __virtio16_to_cpu(tun_is_little_endian(tun), val);
- return __virtio16_to_cpu(tun_is_little_endian(flags), val);
} -static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) +static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) {
- return __cpu_to_virtio16(tun_is_little_endian(tun), val);
- return __cpu_to_virtio16(tun_is_little_endian(flags), val);
} static inline u32 tun_hashfn(u32 rxhash) @@ -1764,6 +1763,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
int flags = tun->flags;
Here and elsewhere: instead of passing around and repeatedly parsing flags, have a variable is_little_endian (or is_le)?
I guess this will not work everywhere, because endianness is not a boolean flag.. Code checks both TUN_VNET_LE and TUN_VNET_BE.
Decouple the vnet handling code so that we can reuse it for tap.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tun.c | 229 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 133 insertions(+), 96 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index add09dfdada5..1f4a066ad2f0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -351,6 +351,122 @@ static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) return __cpu_to_virtio16(tun_is_little_endian(flags), val); }
+static long tun_vnet_ioctl(int *sz, unsigned int *flags, + unsigned int cmd, int __user *sp) +{ + int s; + + switch (cmd) { + case TUNGETVNETHDRSZ: + s = *sz; + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + *sz = s; + return 0; + + case TUNGETVNETLE: + s = !!(*flags & TUN_VNET_LE); + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETLE: + if (get_user(s, sp)) + return -EFAULT; + if (s) + *flags |= TUN_VNET_LE; + else + *flags &= ~TUN_VNET_LE; + return 0; + + case TUNGETVNETBE: + return tun_get_vnet_be(*flags, sp); + + case TUNSETVNETBE: + return tun_set_vnet_be(flags, sp); + + default: + return -EINVAL; + } +} + +static int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from, + struct virtio_net_hdr *hdr) +{ + if (iov_iter_count(from) < sz) + return -EINVAL; + + if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) + return -EFAULT; + + if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2 > tun16_to_cpu(flags, hdr->hdr_len)) + hdr->hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2); + + if (tun16_to_cpu(flags, hdr->hdr_len) > iov_iter_count(from)) + return -EINVAL; + + iov_iter_advance(from, sz - sizeof(*hdr)); + + return tun16_to_cpu(flags, hdr->hdr_len); +} + +static int tun_vnet_hdr_put(int sz, struct iov_iter *iter, + const struct virtio_net_hdr *hdr) +{ + if (unlikely(iov_iter_count(iter) < sz)) + return -EINVAL; + + if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) + return -EFAULT; + + iov_iter_advance(iter, sz - sizeof(*hdr)); + + return 0; +} + +static int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, + const struct virtio_net_hdr *hdr) +{ + return virtio_net_hdr_to_skb(skb, hdr, tun_is_little_endian(flags)); +} + +static int tun_vnet_hdr_from_skb(unsigned int flags, + const struct net_device *dev, + const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; + + if (virtio_net_hdr_from_skb(skb, hdr, + tun_is_little_endian(flags), true, + vlan_hlen)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + if (net_ratelimit()) { + netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun16_to_cpu(flags, hdr->gso_size), + tun16_to_cpu(flags, hdr->hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min(tun16_to_cpu(flags, hdr->hdr_len), 64), true); + } + WARN_ON_ONCE(1); + return -EINVAL; + } + + return 0; +} + static inline u32 tun_hashfn(u32 rxhash) { return rxhash & TUN_MASK_FLOW_ENTRIES; @@ -1763,22 +1879,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - int flags = tun->flags; - - if (iov_iter_count(from) < vnet_hdr_sz) - return -EINVAL; - - if (!copy_from_iter_full(&gso, sizeof(gso), from)) - return -EFAULT;
- if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2 > tun16_to_cpu(flags, gso.hdr_len)) - gso.hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, gso.csum_start) + tun16_to_cpu(flags, gso.csum_offset) + 2); - - if (tun16_to_cpu(flags, gso.hdr_len) > iov_iter_count(from)) - return -EINVAL; - hdr_len = tun16_to_cpu(flags, gso.hdr_len); - iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); + hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso); + if (hdr_len < 0) + return hdr_len; }
len = iov_iter_count(from); @@ -1854,7 +1958,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } }
- if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun->flags))) { + if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb; @@ -2049,18 +2153,15 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun, { int vnet_hdr_sz = 0; size_t size = xdp_frame->len; - size_t ret; + ssize_t ret;
if (tun->flags & IFF_VNET_HDR) { struct virtio_net_hdr gso = { 0 };
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - if (unlikely(iov_iter_count(iter) < vnet_hdr_sz)) - return -EINVAL; - if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) != - sizeof(gso))) - return -EFAULT; - iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + if (ret) + return ret; }
ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz; @@ -2083,6 +2184,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, int vlan_offset = 0; int vlan_hlen = 0; int vnet_hdr_sz = 0; + int ret;
if (skb_vlan_tag_present(skb)) vlan_hlen = VLAN_HLEN; @@ -2108,33 +2210,14 @@ static ssize_t tun_put_user(struct tun_struct *tun,
if (vnet_hdr_sz) { struct virtio_net_hdr gso; - int flags = tun->flags; - - if (iov_iter_count(iter) < vnet_hdr_sz) - return -EINVAL; - - if (virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(flags), true, - vlan_hlen)) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - - if (net_ratelimit()) { - netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(flags, gso.gso_size), - tun16_to_cpu(flags, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(flags, gso.hdr_len), 64), true); - } - WARN_ON_ONCE(1); - return -EINVAL; - }
- if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso)) - return -EFAULT; + ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso); + if (ret) + return ret;
- iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + if (ret) + return ret; }
if (vlan_hlen) { @@ -2494,7 +2577,7 @@ static int tun_xdp_one(struct tun_struct *tun, skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data);
- if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun->flags))) { + if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL; @@ -3078,8 +3161,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, kgid_t group; int ifindex; int sndbuf; - int vnet_hdr_sz; - int le; int ret; bool do_notify = false;
@@ -3286,50 +3367,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun_set_sndbuf(tun); break;
- case TUNGETVNETHDRSZ: - vnet_hdr_sz = tun->vnet_hdr_sz; - if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) - ret = -EFAULT; - break; - - case TUNSETVNETHDRSZ: - if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { - ret = -EFAULT; - break; - } - if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { - ret = -EINVAL; - break; - } - - tun->vnet_hdr_sz = vnet_hdr_sz; - break; - - case TUNGETVNETLE: - le = !!(tun->flags & TUN_VNET_LE); - if (put_user(le, (int __user *)argp)) - ret = -EFAULT; - break; - - case TUNSETVNETLE: - if (get_user(le, (int __user *)argp)) { - ret = -EFAULT; - break; - } - if (le) - tun->flags |= TUN_VNET_LE; - else - tun->flags &= ~TUN_VNET_LE; - break; - - case TUNGETVNETBE: - ret = tun_get_vnet_be(tun->flags, argp); - break; - - case TUNSETVNETBE: - ret = tun_set_vnet_be(&tun->flags, argp); - break; - case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; @@ -3385,7 +3422,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break;
default: - ret = -EINVAL; + ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp); break; }
Akihiko Odaki wrote:
Decouple the vnet handling code so that we can reuse it for tap.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/tun.c | 229 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 133 insertions(+), 96 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index add09dfdada5..1f4a066ad2f0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -351,6 +351,122 @@ static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) return __cpu_to_virtio16(tun_is_little_endian(flags), val); } +static long tun_vnet_ioctl(int *sz, unsigned int *flags,
unsigned int cmd, int __user *sp)
+{
please use vnet_hdr_len_sz instead of sz. It's a bit too cryptic for a casual reader to understand the meaning.
The vnet handling code will be reused by tap.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- MAINTAINERS | 2 +- drivers/net/Makefile | 3 +- drivers/net/tun.c | 174 +----------------------------------------------- drivers/net/tun_vnet.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/net/tun_vnet.h | 25 +++++++ 5 files changed, 205 insertions(+), 174 deletions(-)
diff --git a/MAINTAINERS b/MAINTAINERS index 910305c11e8a..bc32b7e23c79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23902,7 +23902,7 @@ W: http://vtun.sourceforge.net/tun F: Documentation/networking/tuntap.rst F: arch/um/os-Linux/drivers/ F: drivers/net/tap.c -F: drivers/net/tun.c +F: drivers/net/tun*
TURBOCHANNEL SUBSYSTEM M: "Maciej W. Rozycki" macro@orcam.me.uk diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 13743d0e83b5..bb8eb3053772 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,7 +29,8 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ -obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TUN) += tun-drv.o +tun-drv-y := tun.o tun_vnet.o obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1f4a066ad2f0..21abd3613cac 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -83,6 +83,8 @@ #include <linux/uaccess.h> #include <linux/proc_fs.h>
+#include "tun_vnet.h" + static void tun_default_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd);
@@ -94,9 +96,6 @@ static void tun_default_link_ksettings(struct net_device *dev, * overload it to mean fasync when stored there. */ #define TUN_FASYNC IFF_ATTACH_QUEUE -/* High bits in flags field are unused. */ -#define TUN_VNET_LE 0x80000000 -#define TUN_VNET_BE 0x40000000
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS) @@ -298,175 +297,6 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; }
-static inline bool tun_legacy_is_little_endian(unsigned int flags) -{ - return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && - (flags & TUN_VNET_BE)) && - virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(unsigned int flags, int __user *argp) -{ - int be = !!(flags & TUN_VNET_BE); - - if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) - return -EINVAL; - - if (put_user(be, argp)) - return -EFAULT; - - return 0; -} - -static long tun_set_vnet_be(unsigned int *flags, int __user *argp) -{ - int be; - - if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) - return -EINVAL; - - if (get_user(be, argp)) - return -EFAULT; - - if (be) - *flags |= TUN_VNET_BE; - else - *flags &= ~TUN_VNET_BE; - - return 0; -} - -static inline bool tun_is_little_endian(unsigned int flags) -{ - return flags & TUN_VNET_LE || tun_legacy_is_little_endian(flags); -} - -static inline u16 tun16_to_cpu(unsigned int flags, __virtio16 val) -{ - return __virtio16_to_cpu(tun_is_little_endian(flags), val); -} - -static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) -{ - return __cpu_to_virtio16(tun_is_little_endian(flags), val); -} - -static long tun_vnet_ioctl(int *sz, unsigned int *flags, - unsigned int cmd, int __user *sp) -{ - int s; - - switch (cmd) { - case TUNGETVNETHDRSZ: - s = *sz; - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETHDRSZ: - if (get_user(s, sp)) - return -EFAULT; - if (s < (int)sizeof(struct virtio_net_hdr)) - return -EINVAL; - - *sz = s; - return 0; - - case TUNGETVNETLE: - s = !!(*flags & TUN_VNET_LE); - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETLE: - if (get_user(s, sp)) - return -EFAULT; - if (s) - *flags |= TUN_VNET_LE; - else - *flags &= ~TUN_VNET_LE; - return 0; - - case TUNGETVNETBE: - return tun_get_vnet_be(*flags, sp); - - case TUNSETVNETBE: - return tun_set_vnet_be(flags, sp); - - default: - return -EINVAL; - } -} - -static int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from, - struct virtio_net_hdr *hdr) -{ - if (iov_iter_count(from) < sz) - return -EINVAL; - - if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) - return -EFAULT; - - if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2 > tun16_to_cpu(flags, hdr->hdr_len)) - hdr->hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2); - - if (tun16_to_cpu(flags, hdr->hdr_len) > iov_iter_count(from)) - return -EINVAL; - - iov_iter_advance(from, sz - sizeof(*hdr)); - - return tun16_to_cpu(flags, hdr->hdr_len); -} - -static int tun_vnet_hdr_put(int sz, struct iov_iter *iter, - const struct virtio_net_hdr *hdr) -{ - if (unlikely(iov_iter_count(iter) < sz)) - return -EINVAL; - - if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) - return -EFAULT; - - iov_iter_advance(iter, sz - sizeof(*hdr)); - - return 0; -} - -static int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, - const struct virtio_net_hdr *hdr) -{ - return virtio_net_hdr_to_skb(skb, hdr, tun_is_little_endian(flags)); -} - -static int tun_vnet_hdr_from_skb(unsigned int flags, - const struct net_device *dev, - const struct sk_buff *skb, - struct virtio_net_hdr *hdr) -{ - int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; - - if (virtio_net_hdr_from_skb(skb, hdr, - tun_is_little_endian(flags), true, - vlan_hlen)) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - - if (net_ratelimit()) { - netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(flags, hdr->gso_size), - tun16_to_cpu(flags, hdr->hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min(tun16_to_cpu(flags, hdr->hdr_len), 64), true); - } - WARN_ON_ONCE(1); - return -EINVAL; - } - - return 0; -} - static inline u32 tun_hashfn(u32 rxhash) { return rxhash & TUN_MASK_FLOW_ENTRIES; diff --git a/drivers/net/tun_vnet.c b/drivers/net/tun_vnet.c new file mode 100644 index 000000000000..5a6cbfb6eed0 --- /dev/null +++ b/drivers/net/tun_vnet.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include "tun_vnet.h" + +/* High bits in flags field are unused. */ +#define TUN_VNET_LE 0x80000000 +#define TUN_VNET_BE 0x40000000 + +static inline bool tun_legacy_is_little_endian(unsigned int flags) +{ + return !(IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && + (flags & TUN_VNET_BE)) && + virtio_legacy_is_little_endian(); +} + +static long tun_get_vnet_be(unsigned int flags, int __user *argp) +{ + int be = !!(flags & TUN_VNET_BE); + + if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + + if (put_user(be, argp)) + return -EFAULT; + + return 0; +} + +static long tun_set_vnet_be(unsigned int *flags, int __user *argp) +{ + int be; + + if (!IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE)) + return -EINVAL; + + if (get_user(be, argp)) + return -EFAULT; + + if (be) + *flags |= TUN_VNET_BE; + else + *flags &= ~TUN_VNET_BE; + + return 0; +} + +static inline bool tun_is_little_endian(unsigned int flags) +{ + return flags & TUN_VNET_LE || tun_legacy_is_little_endian(flags); +} + +static inline u16 tun16_to_cpu(unsigned int flags, __virtio16 val) +{ + return __virtio16_to_cpu(tun_is_little_endian(flags), val); +} + +static inline __virtio16 cpu_to_tun16(unsigned int flags, u16 val) +{ + return __cpu_to_virtio16(tun_is_little_endian(flags), val); +} + +long tun_vnet_ioctl(int *sz, unsigned int *flags, + unsigned int cmd, int __user *sp) +{ + int s; + + switch (cmd) { + case TUNGETVNETHDRSZ: + s = *sz; + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + *sz = s; + return 0; + + case TUNGETVNETLE: + s = !!(*flags & TUN_VNET_LE); + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETLE: + if (get_user(s, sp)) + return -EFAULT; + if (s) + *flags |= TUN_VNET_LE; + else + *flags &= ~TUN_VNET_LE; + return 0; + + case TUNGETVNETBE: + return tun_get_vnet_be(*flags, sp); + + case TUNSETVNETBE: + return tun_set_vnet_be(flags, sp); + + default: + return -EINVAL; + } +} + +int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from, + struct virtio_net_hdr *hdr) +{ + if (iov_iter_count(from) < sz) + return -EINVAL; + + if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) + return -EFAULT; + + if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2 > tun16_to_cpu(flags, hdr->hdr_len)) + hdr->hdr_len = cpu_to_tun16(flags, tun16_to_cpu(flags, hdr->csum_start) + tun16_to_cpu(flags, hdr->csum_offset) + 2); + + if (tun16_to_cpu(flags, hdr->hdr_len) > iov_iter_count(from)) + return -EINVAL; + + iov_iter_advance(from, sz - sizeof(*hdr)); + + return tun16_to_cpu(flags, hdr->hdr_len); +} + +int tun_vnet_hdr_put(int sz, struct iov_iter *iter, + const struct virtio_net_hdr *hdr) +{ + if (unlikely(iov_iter_count(iter) < sz)) + return -EINVAL; + + if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) + return -EFAULT; + + iov_iter_advance(iter, sz - sizeof(*hdr)); + + return 0; +} + +int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, + const struct virtio_net_hdr *hdr) +{ + return virtio_net_hdr_to_skb(skb, hdr, tun_is_little_endian(flags)); +} + +int tun_vnet_hdr_from_skb(unsigned int flags, + const struct net_device *dev, + const struct sk_buff *skb, + struct virtio_net_hdr *hdr) +{ + int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; + + if (virtio_net_hdr_from_skb(skb, hdr, + tun_is_little_endian(flags), true, + vlan_hlen)) { + struct skb_shared_info *sinfo = skb_shinfo(skb); + + if (net_ratelimit()) { + netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", + sinfo->gso_type, tun16_to_cpu(flags, hdr->gso_size), + tun16_to_cpu(flags, hdr->hdr_len)); + print_hex_dump(KERN_ERR, "tun: ", + DUMP_PREFIX_NONE, + 16, 1, skb->head, + min(tun16_to_cpu(flags, hdr->hdr_len), 64), true); + } + WARN_ON_ONCE(1); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/net/tun_vnet.h b/drivers/net/tun_vnet.h new file mode 100644 index 000000000000..a8d6e4749333 --- /dev/null +++ b/drivers/net/tun_vnet.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef TUN_VNET_H +#define TUN_VNET_H + +#include <linux/if_tun.h> +#include <linux/virtio_net.h> + +long tun_vnet_ioctl(int *sz, unsigned int *flags, + unsigned int cmd, int __user *sp); + +int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from, + struct virtio_net_hdr *hdr); + +int tun_vnet_hdr_put(int sz, struct iov_iter *iter, + const struct virtio_net_hdr *hdr); + +int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, + const struct virtio_net_hdr *hdr); + +int tun_vnet_hdr_from_skb(unsigned int flags, + const struct net_device *dev, + const struct sk_buff *skb, + struct virtio_net_hdr *hdr); + +#endif /* TUN_VNET_H */
Akihiko Odaki wrote:
The vnet handling code will be reused by tap.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
MAINTAINERS | 2 +- drivers/net/Makefile | 3 +- drivers/net/tun.c | 174 +----------------------------------------------- drivers/net/tun_vnet.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/net/tun_vnet.h | 25 +++++++ 5 files changed, 205 insertions(+), 174 deletions(-)
diff --git a/MAINTAINERS b/MAINTAINERS index 910305c11e8a..bc32b7e23c79 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23902,7 +23902,7 @@ W: http://vtun.sourceforge.net/tun F: Documentation/networking/tuntap.rst F: arch/um/os-Linux/drivers/ F: drivers/net/tap.c -F: drivers/net/tun.c +F: drivers/net/tun* TURBOCHANNEL SUBSYSTEM M: "Maciej W. Rozycki" macro@orcam.me.uk diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 13743d0e83b5..bb8eb3053772 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,7 +29,8 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ -obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TUN) += tun-drv.o +tun-drv-y := tun.o tun_vnet.o obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
Let's figure out how to do this. See also comment in 9/9.
Otherwise the rest of the patch looks fine.
tap_get_user() used to track the length of iov_iter with another variable. We can use iov_iter_count() to determine the current length to avoid such chores.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 5aa41d5f7765..061c2f27dfc8 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -641,7 +641,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, struct sk_buff *skb; struct tap_dev *tap; unsigned long total_len = iov_iter_count(from); - unsigned long len = total_len; + unsigned long len; int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; @@ -655,9 +655,8 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
err = -EINVAL; - if (len < vnet_hdr_len) + if (iov_iter_count(from) < vnet_hdr_len) goto err; - len -= vnet_hdr_len;
err = -EFAULT; if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) @@ -671,10 +670,12 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, tap16_to_cpu(q, vnet_hdr.csum_start) + tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); err = -EINVAL; - if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len) + if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) goto err; }
+ len = iov_iter_count(from); + err = -EINVAL; if (unlikely(len < ETH_HLEN)) goto err;
hdr_len is repeatedly used so keep it in a local variable.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/tap.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 061c2f27dfc8..7ee2e9ee2a89 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -645,6 +645,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; + int hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false; @@ -672,6 +673,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, err = -EINVAL; if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) goto err; + hdr_len = tap16_to_cpu(q, vnet_hdr.hdr_len); }
len = iov_iter_count(from); @@ -683,11 +685,8 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i;
- copylen = vnet_hdr.hdr_len ? - tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; - else if (copylen < ETH_HLEN) + copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear); + if (copylen < ETH_HLEN) copylen = ETH_HLEN; linear = copylen; i = *from; @@ -698,11 +697,9 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
if (!zerocopy) { copylen = len; - linear = tap16_to_cpu(q, vnet_hdr.hdr_len); - if (linear > good_linear) - linear = good_linear; - else if (linear < ETH_HLEN) - linear = ETH_HLEN; + linear = min(hdr_len, good_linear); + if (copylen < ETH_HLEN) + copylen = ETH_HLEN; }
skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen,
tun and tap implements the same vnet-related features so reuse the code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com --- drivers/net/Kconfig | 1 + drivers/net/Makefile | 6 +- drivers/net/tap.c | 152 +++++-------------------------------------------- drivers/net/tun_vnet.c | 5 ++ 4 files changed, 24 insertions(+), 140 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1fd5acdc73c6..c420418473fc 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -395,6 +395,7 @@ config TUN tristate "Universal TUN/TAP device driver support" depends on INET select CRC32 + select TAP help TUN/TAP provides packet reception and transmission for user space programs. It can be viewed as a simple Point-to-Point or Ethernet diff --git a/drivers/net/Makefile b/drivers/net/Makefile index bb8eb3053772..2275309a97ee 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,9 +29,9 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ -obj-$(CONFIG_TUN) += tun-drv.o -tun-drv-y := tun.o tun_vnet.o -obj-$(CONFIG_TAP) += tap.o +obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TAP) += tap-drv.o +tap-drv-y := tap.o tun_vnet.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan/ diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 7ee2e9ee2a89..4f3cc3b2e3c6 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -26,74 +26,9 @@ #include <linux/virtio_net.h> #include <linux/skb_array.h>
-#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) - -#define TAP_VNET_LE 0x80000000 -#define TAP_VNET_BE 0x40000000 - -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s = !!(q->flags & TAP_VNET_BE); - - if (put_user(s, sp)) - return -EFAULT; - - return 0; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *sp) -{ - int s; - - if (get_user(s, sp)) - return -EFAULT; - - if (s) - q->flags |= TAP_VNET_BE; - else - q->flags &= ~TAP_VNET_BE; - - return 0; -} -#else -static inline bool tap_legacy_is_little_endian(struct tap_queue *q) -{ - return virtio_legacy_is_little_endian(); -} - -static long tap_get_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} - -static long tap_set_vnet_be(struct tap_queue *q, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool tap_is_little_endian(struct tap_queue *q) -{ - return q->flags & TAP_VNET_LE || - tap_legacy_is_little_endian(q); -} - -static inline u16 tap16_to_cpu(struct tap_queue *q, __virtio16 val) -{ - return __virtio16_to_cpu(tap_is_little_endian(q), val); -} +#include "tun_vnet.h"
-static inline __virtio16 cpu_to_tap16(struct tap_queue *q, u16 val) -{ - return __cpu_to_virtio16(tap_is_little_endian(q), val); -} +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
static struct proto tap_proto = { .name = "tap", @@ -655,25 +590,11 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
- err = -EINVAL; - if (iov_iter_count(from) < vnet_hdr_len) - goto err; - - err = -EFAULT; - if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) - goto err; - iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr)); - if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > - tap16_to_cpu(q, vnet_hdr.hdr_len)) - vnet_hdr.hdr_len = cpu_to_tap16(q, - tap16_to_cpu(q, vnet_hdr.csum_start) + - tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); - err = -EINVAL; - if (tap16_to_cpu(q, vnet_hdr.hdr_len) > iov_iter_count(from)) + hdr_len = tun_vnet_hdr_get(vnet_hdr_len, q->flags, from, &vnet_hdr); + if (hdr_len < 0) { + err = hdr_len; goto err; - hdr_len = tap16_to_cpu(q, vnet_hdr.hdr_len); + } }
len = iov_iter_count(from); @@ -731,8 +652,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, skb->dev = tap->dev;
if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, &vnet_hdr, - tap_is_little_endian(q)); + err = tun_vnet_hdr_to_skb(q->flags, skb, &vnet_hdr); if (err) { rcu_read_unlock(); drop_reason = SKB_DROP_REASON_DEV_HDR; @@ -795,23 +715,17 @@ static ssize_t tap_put_user(struct tap_queue *q, int total;
if (q->flags & IFF_VNET_HDR) { - int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; struct virtio_net_hdr vnet_hdr;
vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); - if (iov_iter_count(iter) < vnet_hdr_len) - return -EINVAL;
- if (virtio_net_hdr_from_skb(skb, &vnet_hdr, - tap_is_little_endian(q), true, - vlan_hlen)) - BUG(); - - if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != - sizeof(vnet_hdr)) - return -EFAULT; + ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr); + if (ret) + return ret;
- iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr)); + ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr); + if (ret) + return ret; } total = vnet_hdr_len; total += skb->len; @@ -1070,42 +984,6 @@ static long tap_ioctl(struct file *file, unsigned int cmd, q->sk.sk_sndbuf = s; return 0;
- case TUNGETVNETHDRSZ: - s = q->vnet_hdr_sz; - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETHDRSZ: - if (get_user(s, sp)) - return -EFAULT; - if (s < (int)sizeof(struct virtio_net_hdr)) - return -EINVAL; - - q->vnet_hdr_sz = s; - return 0; - - case TUNGETVNETLE: - s = !!(q->flags & TAP_VNET_LE); - if (put_user(s, sp)) - return -EFAULT; - return 0; - - case TUNSETVNETLE: - if (get_user(s, sp)) - return -EFAULT; - if (s) - q->flags |= TAP_VNET_LE; - else - q->flags &= ~TAP_VNET_LE; - return 0; - - case TUNGETVNETBE: - return tap_get_vnet_be(q, sp); - - case TUNSETVNETBE: - return tap_set_vnet_be(q, sp); - case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | @@ -1149,7 +1027,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, return ret;
default: - return -EINVAL; + return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp); } }
@@ -1196,7 +1074,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) skb->protocol = eth_hdr(skb)->h_proto;
if (vnet_hdr_len) { - err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q)); + err = tun_vnet_hdr_to_skb(q->flags, skb, gso); if (err) goto err_kfree; } diff --git a/drivers/net/tun_vnet.c b/drivers/net/tun_vnet.c index 5a6cbfb6eed0..960a5fa5a332 100644 --- a/drivers/net/tun_vnet.c +++ b/drivers/net/tun_vnet.c @@ -104,6 +104,7 @@ long tun_vnet_ioctl(int *sz, unsigned int *flags, return -EINVAL; } } +EXPORT_SYMBOL_GPL(tun_vnet_ioctl);
int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from, struct virtio_net_hdr *hdr) @@ -125,6 +126,7 @@ int tun_vnet_hdr_get(int sz, unsigned int flags, struct iov_iter *from,
return tun16_to_cpu(flags, hdr->hdr_len); } +EXPORT_SYMBOL_GPL(tun_vnet_hdr_get);
int tun_vnet_hdr_put(int sz, struct iov_iter *iter, const struct virtio_net_hdr *hdr) @@ -139,12 +141,14 @@ int tun_vnet_hdr_put(int sz, struct iov_iter *iter,
return 0; } +EXPORT_SYMBOL_GPL(tun_vnet_hdr_put);
int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, const struct virtio_net_hdr *hdr) { return virtio_net_hdr_to_skb(skb, hdr, tun_is_little_endian(flags)); } +EXPORT_SYMBOL_GPL(tun_vnet_hdr_to_skb);
int tun_vnet_hdr_from_skb(unsigned int flags, const struct net_device *dev, @@ -173,3 +177,4 @@ int tun_vnet_hdr_from_skb(unsigned int flags,
return 0; } +EXPORT_SYMBOL_GPL(tun_vnet_hdr_from_skb);
Akihiko Odaki wrote:
tun and tap implements the same vnet-related features so reuse the code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/Kconfig | 1 + drivers/net/Makefile | 6 +- drivers/net/tap.c | 152 +++++-------------------------------------------- drivers/net/tun_vnet.c | 5 ++ 4 files changed, 24 insertions(+), 140 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1fd5acdc73c6..c420418473fc 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -395,6 +395,7 @@ config TUN tristate "Universal TUN/TAP device driver support" depends on INET select CRC32
- select TAP help TUN/TAP provides packet reception and transmission for user space programs. It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index bb8eb3053772..2275309a97ee 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,9 +29,9 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ -obj-$(CONFIG_TUN) += tun-drv.o -tun-drv-y := tun.o tun_vnet.o -obj-$(CONFIG_TAP) += tap.o +obj-$(CONFIG_TUN) += tun.o
Is reversing the previous changes to tun.ko intentional?
Perhaps the previous approach with a new CONFIG_TUN_VNET is preferable over this. In particular over making TUN select TAP, a new dependency.
+obj-$(CONFIG_TAP) += tap-drv.o +tap-drv-y := tap.o tun_vnet.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan/
On 2025/01/17 18:23, Willem de Bruijn wrote:
Akihiko Odaki wrote:
tun and tap implements the same vnet-related features so reuse the code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/Kconfig | 1 + drivers/net/Makefile | 6 +- drivers/net/tap.c | 152 +++++-------------------------------------------- drivers/net/tun_vnet.c | 5 ++ 4 files changed, 24 insertions(+), 140 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1fd5acdc73c6..c420418473fc 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -395,6 +395,7 @@ config TUN tristate "Universal TUN/TAP device driver support" depends on INET select CRC32
- select TAP help TUN/TAP provides packet reception and transmission for user space programs. It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index bb8eb3053772..2275309a97ee 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,9 +29,9 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ -obj-$(CONFIG_TUN) += tun-drv.o -tun-drv-y := tun.o tun_vnet.o -obj-$(CONFIG_TAP) += tap.o +obj-$(CONFIG_TUN) += tun.o
Is reversing the previous changes to tun.ko intentional?
Perhaps the previous approach with a new CONFIG_TUN_VNET is preferable over this. In particular over making TUN select TAP, a new dependency.
Jason, you also commented about CONFIG_TUN_VNET for the previous version. Do you prefer the old approach, or the new one? (Or if you have another idea, please tell me.)
+obj-$(CONFIG_TAP) += tap-drv.o +tap-drv-y := tap.o tun_vnet.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan/
On Fri, Jan 17, 2025 at 6:35 PM Akihiko Odaki akihiko.odaki@daynix.com wrote:
On 2025/01/17 18:23, Willem de Bruijn wrote:
Akihiko Odaki wrote:
tun and tap implements the same vnet-related features so reuse the code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/Kconfig | 1 + drivers/net/Makefile | 6 +- drivers/net/tap.c | 152 +++++-------------------------------------------- drivers/net/tun_vnet.c | 5 ++ 4 files changed, 24 insertions(+), 140 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1fd5acdc73c6..c420418473fc 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -395,6 +395,7 @@ config TUN tristate "Universal TUN/TAP device driver support" depends on INET select CRC32
- select TAP help TUN/TAP provides packet reception and transmission for user space programs. It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index bb8eb3053772..2275309a97ee 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,9 +29,9 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ -obj-$(CONFIG_TUN) += tun-drv.o -tun-drv-y := tun.o tun_vnet.o -obj-$(CONFIG_TAP) += tap.o +obj-$(CONFIG_TUN) += tun.o
Is reversing the previous changes to tun.ko intentional?
Perhaps the previous approach with a new CONFIG_TUN_VNET is preferable over this. In particular over making TUN select TAP, a new dependency.
Jason, you also commented about CONFIG_TUN_VNET for the previous version. Do you prefer the old approach, or the new one? (Or if you have another idea, please tell me.)
Ideally, if we can make TUN select TAP that would be better. But there are some subtle differences in the multi queue implementation. We will end up with some useless code for TUN unless we can unify the multi queue logic. It might not be worth it to change the TUN's multi queue logic so having a new file seems to be better.
Thanks
+obj-$(CONFIG_TAP) += tap-drv.o +tap-drv-y := tap.o tun_vnet.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan/
On Mon, Jan 20, 2025 at 1:37 AM Jason Wang jasowang@redhat.com wrote:
On Fri, Jan 17, 2025 at 6:35 PM Akihiko Odaki akihiko.odaki@daynix.com wrote:
On 2025/01/17 18:23, Willem de Bruijn wrote:
Akihiko Odaki wrote:
tun and tap implements the same vnet-related features so reuse the code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/Kconfig | 1 + drivers/net/Makefile | 6 +- drivers/net/tap.c | 152 +++++-------------------------------------------- drivers/net/tun_vnet.c | 5 ++ 4 files changed, 24 insertions(+), 140 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1fd5acdc73c6..c420418473fc 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -395,6 +395,7 @@ config TUN tristate "Universal TUN/TAP device driver support" depends on INET select CRC32
- select TAP help TUN/TAP provides packet reception and transmission for user space programs. It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index bb8eb3053772..2275309a97ee 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,9 +29,9 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ -obj-$(CONFIG_TUN) += tun-drv.o -tun-drv-y := tun.o tun_vnet.o -obj-$(CONFIG_TAP) += tap.o +obj-$(CONFIG_TUN) += tun.o
Is reversing the previous changes to tun.ko intentional?
Perhaps the previous approach with a new CONFIG_TUN_VNET is preferable over this. In particular over making TUN select TAP, a new dependency.
Jason, you also commented about CONFIG_TUN_VNET for the previous version. Do you prefer the old approach, or the new one? (Or if you have another idea, please tell me.)
Ideally, if we can make TUN select TAP that would be better. But there are some subtle differences in the multi queue implementation. We will end up with some useless code for TUN unless we can unify the multi queue logic. It might not be worth it to change the TUN's multi queue logic so having a new file seems to be better.
+1 on deduplicating further. But this series is complex enough. Let's not expand that.
The latest approach with a separate .o file may have some performance cost by converting likely inlined code into real function calls. Another option is to move it all into tun_vnet.h. That also resolves the Makefile issues.
On 2025/01/20 20:19, Willem de Bruijn wrote:
On Mon, Jan 20, 2025 at 1:37 AM Jason Wang jasowang@redhat.com wrote:
On Fri, Jan 17, 2025 at 6:35 PM Akihiko Odaki akihiko.odaki@daynix.com wrote:
On 2025/01/17 18:23, Willem de Bruijn wrote:
Akihiko Odaki wrote:
tun and tap implements the same vnet-related features so reuse the code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/Kconfig | 1 + drivers/net/Makefile | 6 +- drivers/net/tap.c | 152 +++++-------------------------------------------- drivers/net/tun_vnet.c | 5 ++ 4 files changed, 24 insertions(+), 140 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1fd5acdc73c6..c420418473fc 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -395,6 +395,7 @@ config TUN tristate "Universal TUN/TAP device driver support" depends on INET select CRC32
- select TAP help TUN/TAP provides packet reception and transmission for user space programs. It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index bb8eb3053772..2275309a97ee 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,9 +29,9 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ -obj-$(CONFIG_TUN) += tun-drv.o -tun-drv-y := tun.o tun_vnet.o -obj-$(CONFIG_TAP) += tap.o +obj-$(CONFIG_TUN) += tun.o
Is reversing the previous changes to tun.ko intentional?
Perhaps the previous approach with a new CONFIG_TUN_VNET is preferable over this. In particular over making TUN select TAP, a new dependency.
Jason, you also commented about CONFIG_TUN_VNET for the previous version. Do you prefer the old approach, or the new one? (Or if you have another idea, please tell me.)
Ideally, if we can make TUN select TAP that would be better. But there are some subtle differences in the multi queue implementation. We will end up with some useless code for TUN unless we can unify the multi queue logic. It might not be worth it to change the TUN's multi queue logic so having a new file seems to be better.
+1 on deduplicating further. But this series is complex enough. Let's not expand that.
The latest approach with a separate .o file may have some performance cost by converting likely inlined code into real function calls. Another option is to move it all into tun_vnet.h. That also resolves the Makefile issues.
I measured the size difference between the latest inlining approaches. The numbers may vary depending on the system configuration of course, but they should be useful for reference.
The below shows sizes when having a separate module: 106496 bytes in total
# lsmod Module Size Used by tap 28672 0 tun 61440 0 tun_vnet 16384 2 tun,tap
The below shows sizes when inlining: 102400 bytes in total
# lsmod Module Size Used by tap 32768 0 tun 69632 0
So having a separate module costs 4096 bytes more.
These two approaches should have similar tendency for run-time and compile-time performance; the code is so trivial that the overhead of having one additional module is dominant.
The only downside of having all in tun_vnet.h is that it will expose its internal macros and functions, which I think tolerable.
Akihiko Odaki wrote:
On 2025/01/20 20:19, Willem de Bruijn wrote:
On Mon, Jan 20, 2025 at 1:37 AM Jason Wang jasowang@redhat.com wrote:
On Fri, Jan 17, 2025 at 6:35 PM Akihiko Odaki akihiko.odaki@daynix.com wrote:
On 2025/01/17 18:23, Willem de Bruijn wrote:
Akihiko Odaki wrote:
tun and tap implements the same vnet-related features so reuse the code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
drivers/net/Kconfig | 1 + drivers/net/Makefile | 6 +- drivers/net/tap.c | 152 +++++-------------------------------------------- drivers/net/tun_vnet.c | 5 ++ 4 files changed, 24 insertions(+), 140 deletions(-)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 1fd5acdc73c6..c420418473fc 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -395,6 +395,7 @@ config TUN tristate "Universal TUN/TAP device driver support" depends on INET select CRC32
- select TAP help TUN/TAP provides packet reception and transmission for user space programs. It can be viewed as a simple Point-to-Point or Ethernet
diff --git a/drivers/net/Makefile b/drivers/net/Makefile index bb8eb3053772..2275309a97ee 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,9 +29,9 @@ obj-y += mdio/ obj-y += pcs/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ -obj-$(CONFIG_TUN) += tun-drv.o -tun-drv-y := tun.o tun_vnet.o -obj-$(CONFIG_TAP) += tap.o +obj-$(CONFIG_TUN) += tun.o
Is reversing the previous changes to tun.ko intentional?
Perhaps the previous approach with a new CONFIG_TUN_VNET is preferable over this. In particular over making TUN select TAP, a new dependency.
Jason, you also commented about CONFIG_TUN_VNET for the previous version. Do you prefer the old approach, or the new one? (Or if you have another idea, please tell me.)
Ideally, if we can make TUN select TAP that would be better. But there are some subtle differences in the multi queue implementation. We will end up with some useless code for TUN unless we can unify the multi queue logic. It might not be worth it to change the TUN's multi queue logic so having a new file seems to be better.
+1 on deduplicating further. But this series is complex enough. Let's not expand that.
The latest approach with a separate .o file may have some performance cost by converting likely inlined code into real function calls. Another option is to move it all into tun_vnet.h. That also resolves the Makefile issues.
I measured the size difference between the latest inlining approaches. The numbers may vary depending on the system configuration of course, but they should be useful for reference.
The below shows sizes when having a separate module: 106496 bytes in total
# lsmod Module Size Used by tap 28672 0 tun 61440 0 tun_vnet 16384 2 tun,tap
The below shows sizes when inlining: 102400 bytes in total
# lsmod Module Size Used by tap 32768 0 tun 69632 0
So having a separate module costs 4096 bytes more.
These two approaches should have similar tendency for run-time and compile-time performance; the code is so trivial that the overhead of having one additional module is dominant.
The concern raised was not about object size, but about inlined vs true calls in the hot path.
The only downside of having all in tun_vnet.h is that it will expose its internal macros and functions, which I think tolerable.
As long as only included by tun.c and tap.c, I think that's okay. The slow path code (ioctl) could remain in a .c file. But it's simpler to just have the header file.
On Thu, Jan 16, 2025 at 05:08:03PM +0900, Akihiko Odaki wrote:
When I implemented virtio's hash-related features to tun/tap [1], I found tun/tap does not fill the entire region reserved for the virtio header, leaving some uninitialized hole in the middle of the buffer after read()/recvmesg().
This series fills the uninitialized hole. More concretely, the num_buffers field will be initialized with 1, and the other fields will be inialized with 0. Setting the num_buffers field to 1 is mandated by virtio 1.0 [2].
The change to virtio header is preceded by another change that refactors tun and tap to unify their virtio-related code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
Will review. But this does not look like net material to me. Not really a bugfix. More like net-next.
Changes in v3:
- Dropped changes to fill the vnet header.
- Splitted patch "tun: Unify vnet implementation".
- Reverted spurious changes in patch "tun: Unify vnet implementation".
- Merged tun_vnet.c into TAP.
- Link to v2: https://lore.kernel.org/r/20250109-tun-v2-0-388d7d5a287a@daynix.com
Changes in v2:
- Fixed num_buffers endian.
- Link to v1: https://lore.kernel.org/r/20250108-tun-v1-0-67d784b34374@daynix.com
Akihiko Odaki (9): tun: Refactor CONFIG_TUN_VNET_CROSS_LE tun: Avoid double-tracking iov_iter length changes tun: Keep hdr_len in tun_get_user() tun: Decouple vnet from tun_struct tun: Decouple vnet handling tun: Extract the vnet handling code tap: Avoid double-tracking iov_iter length changes tap: Keep hdr_len in tap_get_user() tap: Use tun's vnet-related code
MAINTAINERS | 2 +- drivers/net/Kconfig | 1 + drivers/net/Makefile | 3 +- drivers/net/tap.c | 172 ++++++------------------------------------ drivers/net/tun.c | 200 +++++++------------------------------------------ drivers/net/tun_vnet.c | 180 ++++++++++++++++++++++++++++++++++++++++++++ drivers/net/tun_vnet.h | 25 +++++++ 7 files changed, 260 insertions(+), 323 deletions(-)
base-commit: a32e14f8aef69b42826cf0998b068a43d486a9e9 change-id: 20241230-tun-66e10a49b0c7
Best regards,
Akihiko Odaki akihiko.odaki@daynix.com
Michael S. Tsirkin wrote:
On Thu, Jan 16, 2025 at 05:08:03PM +0900, Akihiko Odaki wrote:
When I implemented virtio's hash-related features to tun/tap [1], I found tun/tap does not fill the entire region reserved for the virtio header, leaving some uninitialized hole in the middle of the buffer after read()/recvmesg().
This series fills the uninitialized hole. More concretely, the num_buffers field will be initialized with 1, and the other fields will be inialized with 0. Setting the num_buffers field to 1 is mandated by virtio 1.0 [2].
The change to virtio header is preceded by another change that refactors tun and tap to unify their virtio-related code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
Will review. But this does not look like net material to me. Not really a bugfix. More like net-next.
+1. I said basically the same in v2.
Perhaps the field initialization is net material, though not critical until hashing is merged, so not stable.
The deduplication does not belong in net.
IMHO it should all go to net-next.
On 2025/01/16 21:54, Willem de Bruijn wrote:
Michael S. Tsirkin wrote:
On Thu, Jan 16, 2025 at 05:08:03PM +0900, Akihiko Odaki wrote:
When I implemented virtio's hash-related features to tun/tap [1], I found tun/tap does not fill the entire region reserved for the virtio header, leaving some uninitialized hole in the middle of the buffer after read()/recvmesg().
This series fills the uninitialized hole. More concretely, the num_buffers field will be initialized with 1, and the other fields will be inialized with 0. Setting the num_buffers field to 1 is mandated by virtio 1.0 [2].
The change to virtio header is preceded by another change that refactors tun and tap to unify their virtio-related code.
Signed-off-by: Akihiko Odaki akihiko.odaki@daynix.com
Will review. But this does not look like net material to me. Not really a bugfix. More like net-next.
+1. I said basically the same in v2.
Perhaps the field initialization is net material, though not critical until hashing is merged, so not stable.
The deduplication does not belong in net.
IMHO it should all go to net-next.
I will post the future version for net-next. I also intend to post the field initialization for net-next too.
linux-kselftest-mirror@lists.linaro.org