The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
The patch below does not apply to the 5.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 32c72165dbd0e246e69d16a3ad348a4851afd415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kadlecsik=20J=C3=B3zsef?= <kadlec(a)blackhole.kfki.hu>
Date: Sun, 19 Jan 2020 22:06:49 +0100
Subject: [PATCH] netfilter: ipset: use bitmap infrastructure completely
The bitmap allocation did not use full unsigned long sizes
when calculating the required size and that was triggered by KASAN
as slab-out-of-bounds read in several places. The patch fixes all
of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de(a)syzkaller.appspotmail.com
Reported-by: syzbot+827ced406c9a1d9570ed(a)syzkaller.appspotmail.com
Reported-by: syzbot+190d63957b22ef673ea5(a)syzkaller.appspotmail.com
Reported-by: syzbot+dfccdb2bdb4a12ad425e(a)syzkaller.appspotmail.com
Reported-by: syzbot+df0d0f5895ef1f41a65b(a)syzkaller.appspotmail.com
Reported-by: syzbot+b08bd19bb37513357fd4(a)syzkaller.appspotmail.com
Reported-by: syzbot+53cdd0ec0bbabd53370a(a)syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec(a)netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 4d8b1eaf7708..908d38dbcb91 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -426,13 +426,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr)
sizeof(*addr));
}
-/* Calculate the bytes required to store the inclusive range of a-b */
-static inline int
-bitmap_bytes(u32 a, u32 b)
-{
- return 4 * ((((b - a + 8) / 8) + 3) / 4);
-}
-
/* How often should the gc be run by default */
#define IPSET_GC_TIME (3 * 60)
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 077a2cb65fcb..26ab0e9612d8 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -75,7 +75,7 @@ mtype_flush(struct ip_set *set)
if (set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
- memset(map->members, 0, map->memsize);
+ bitmap_zero(map->members, map->elements);
set->elements = 0;
set->ext_size = 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index abe8f77d7d23..0a2196f59106 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -220,7 +220,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
u32 first_ip, u32 last_ip,
u32 elements, u32 hosts, u8 netmask)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -322,7 +322,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ip;
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index b618713297da..739e343efaf6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -42,7 +42,7 @@ enum {
/* Type structure */
struct bitmap_ipmac {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -299,7 +299,7 @@ static bool
init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
u32 first_ip, u32 last_ip, u32 elements)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_ip = first_ip;
@@ -360,7 +360,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (!map)
return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_ipmac;
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 23d6095cb196..b49978dd810d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
- void *members; /* the set members */
+ unsigned long *members; /* the set members */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -231,7 +231,7 @@ static bool
init_map_port(struct ip_set *set, struct bitmap_port *map,
u16 first_port, u16 last_port)
{
- map->members = ip_set_alloc(map->memsize);
+ map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN);
if (!map->members)
return false;
map->first_port = first_port;
@@ -271,7 +271,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -ENOMEM;
map->elements = elements;
- map->memsize = bitmap_bytes(0, map->elements);
+ map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long);
set->variant = &bitmap_port;
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
On Mon, 03 Feb 2020 06:51:22 +0100,
Hillf Danton wrote:
>
>
> Sun, 02 Feb 2020 04:56:10 -0800 (PST)
> > syzbot has found a reproducer for the following crash on:
> >
> > HEAD commit: 2747d5fd Add linux-next specific files for 20200116
This looks like a fairly old head, and missing the fix that is already
included in 5.5 release:
60adcfde92fa40fcb2dbf7cc52f9b096e0cd109a
ALSA: seq: Fix racy access for queue timer in proc read
thanks,
Takashi
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 09ed259fac621634d51cd986aa8d65f035662658 Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu(a)ti.com>
Date: Wed, 11 Dec 2019 10:10:03 -0600
Subject: [PATCH] usb: dwc3: turn off VBUS when leaving host mode
VBUS should be turned off when leaving the host mode.
Set GCTL_PRTCAP to device mode in teardown to de-assert DRVVBUS pin to
turn off VBUS power.
Fixes: 5f94adfeed97 ("usb: dwc3: core: refactor mode initialization to its own function")
Cc: stable(a)vger.kernel.org
Signed-off-by: Bin Liu <b-liu(a)ti.com>
Signed-off-by: Felipe Balbi <balbi(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index f561c6c9e8a9..1d85c42b9c67 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1246,6 +1246,9 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc)
/* do nothing */
break;
}
+
+ /* de-assert DRVVBUS for HOST and OTG mode */
+ dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE);
}
static void dwc3_get_properties(struct dwc3 *dwc)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e93cd35101b61e4c79149be2cfc927c4b28dc60c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan(a)kernel.org>
Date: Thu, 28 Nov 2019 18:22:00 +0100
Subject: [PATCH] rsi: fix use-after-free on failed probe and unbind
Make sure to stop both URBs before returning after failed probe as well
as on disconnect to avoid use-after-free in the completion handler.
Reported-by: syzbot+b563b7f8dbe8223a51e8(a)syzkaller.appspotmail.com
Fixes: a4302bff28e2 ("rsi: add bluetooth rx endpoint")
Fixes: dad0d04fa7ba ("rsi: Add RS9113 wireless driver")
Cc: stable <stable(a)vger.kernel.org> # 3.15
Cc: Siva Rebbagondla <siva.rebbagondla(a)redpinesignals.com>
Cc: Prameela Rani Garnepudi <prameela.j04cs(a)gmail.com>
Cc: Amitkumar Karwar <amit.karwar(a)redpinesignals.com>
Cc: Fariya Fatima <fariyaf(a)gmail.com>
Signed-off-by: Johan Hovold <johan(a)kernel.org>
Signed-off-by: Kalle Valo <kvalo(a)codeaurora.org>
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 53f41fc2cadf..30bed719486e 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -292,6 +292,15 @@ static void rsi_rx_done_handler(struct urb *urb)
dev_kfree_skb(rx_cb->rx_skb);
}
+static void rsi_rx_urb_kill(struct rsi_hw *adapter, u8 ep_num)
+{
+ struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
+ struct rx_usb_ctrl_block *rx_cb = &dev->rx_cb[ep_num - 1];
+ struct urb *urb = rx_cb->rx_urb;
+
+ usb_kill_urb(urb);
+}
+
/**
* rsi_rx_urb_submit() - This function submits the given URB to the USB stack.
* @adapter: Pointer to the adapter structure.
@@ -823,10 +832,13 @@ static int rsi_probe(struct usb_interface *pfunction,
if (adapter->priv->coex_mode > 1) {
status = rsi_rx_urb_submit(adapter, BT_EP);
if (status)
- goto err1;
+ goto err_kill_wlan_urb;
}
return 0;
+
+err_kill_wlan_urb:
+ rsi_rx_urb_kill(adapter, WLAN_EP);
err1:
rsi_deinit_usb_interface(adapter);
err:
@@ -857,6 +869,10 @@ static void rsi_disconnect(struct usb_interface *pfunction)
adapter->priv->bt_adapter = NULL;
}
+ if (adapter->priv->coex_mode > 1)
+ rsi_rx_urb_kill(adapter, BT_EP);
+ rsi_rx_urb_kill(adapter, WLAN_EP);
+
rsi_reset_card(adapter);
rsi_deinit_usb_interface(adapter);
rsi_91x_deinit(adapter);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 07bfd9bdf568a38d9440c607b72342036011f727 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert(a)gondor.apana.org.au>
Date: Tue, 19 Nov 2019 17:41:31 +0800
Subject: [PATCH] crypto: pcrypt - Fix user-after-free on module unload
On module unload of pcrypt we must unregister the crypto algorithms
first and then tear down the padata structure. As otherwise the
crypto algorithms are still alive and can be used while the padata
structure is being freed.
Fixes: 5068c7a883d1 ("crypto: pcrypt - Add pcrypt crypto...")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 543792e0ebf0..81bbea7f2ba6 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -362,11 +362,12 @@ static int __init pcrypt_init(void)
static void __exit pcrypt_exit(void)
{
+ crypto_unregister_template(&pcrypt_tmpl);
+
pcrypt_fini_padata(pencrypt);
pcrypt_fini_padata(pdecrypt);
kset_unregister(pcrypt_kset);
- crypto_unregister_template(&pcrypt_tmpl);
}
subsys_initcall(pcrypt_init);
commit b8511ccc75c033f6d54188ea4df7bf1e85778740 upstream.
A resource group (rdtgrp) contains a reference count (rdtgrp->waitcount)
that indicates how many waiters expect this rdtgrp to exist. Waiters
could be waiting on rdtgroup_mutex or some work sitting on a task's
workqueue for when the task returns from kernel mode or exits.
The deletion of a rdtgrp is intended to have two phases:
(1) while holding rdtgroup_mutex the necessary cleanup is done and
rdtgrp->flags is set to RDT_DELETED,
(2) after releasing the rdtgroup_mutex, the rdtgrp structure is freed
only if there are no waiters and its flag is set to RDT_DELETED. Upon
gaining access to rdtgroup_mutex or rdtgrp, a waiter is required to check
for the RDT_DELETED flag.
When unmounting the resctrl file system or deleting ctrl_mon groups,
all of the subdirectories are removed and the data structure of rdtgrp
is forcibly freed without checking rdtgrp->waitcount. If at this point
there was a waiter on rdtgrp then a use-after-free issue occurs when the
waiter starts running and accesses the rdtgrp structure it was waiting
on.
See kfree() calls in [1], [2] and [3] in these two call paths in
following scenarios:
(1) rdt_kill_sb() -> rmdir_all_sub() -> free_all_child_rdtgrp()
(2) rdtgroup_rmdir() -> rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
There are several scenarios that result in use-after-free issue in
following:
Scenario 1:
-----------
In Thread 1, rdtgroup_tasks_write() adds a task_work callback
move_myself(). If move_myself() is scheduled to execute after Thread 2
rdt_kill_sb() is finished, referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_tasks_write) Thread 2 (rdt_kill_sb)
------------------------------- ----------------------
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_move_task
__rdtgroup_move_task
/*
* Take an extra refcount, so rdtgrp cannot be freed
* before the call back move_myself has been invoked
*/
atomic_inc(&rdtgrp->waitcount)
/* Callback move_myself will be scheduled for later */
task_work_add(move_myself)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
mutex_lock
rmdir_all_sub
/*
* sentry and rdtgrp are freed
* without checking refcount
*/
free_all_child_rdtgrp
kfree(sentry)*[1]
kfree(rdtgrp)*[2]
mutex_unlock
/*
* Callback is scheduled to execute
* after rdt_kill_sb is finished
*/
move_myself
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [1] or [2].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
Scenario 2:
-----------
In Thread 1, rdtgroup_tasks_write() adds a task_work callback
move_myself(). If move_myself() is scheduled to execute after Thread 2
rdtgroup_rmdir() is finished, referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_tasks_write) Thread 2 (rdtgroup_rmdir)
------------------------------- -------------------------
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_move_task
__rdtgroup_move_task
/*
* Take an extra refcount, so rdtgrp cannot be freed
* before the call back move_myself has been invoked
*/
atomic_inc(&rdtgrp->waitcount)
/* Callback move_myself will be scheduled for later */
task_work_add(move_myself)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
/*
* sentry is freed without
* checking refcount
*/
kfree(sentry)*[3]
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(
&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
/*
* Callback is scheduled to execute
* after rdt_kill_sb is finished
*/
move_myself
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [3].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
If CONFIG_DEBUG_SLAB=y, Slab corruption on kmalloc-2k can be observed
like following. Note that "0x6b" is POISON_FREE after kfree(). The
corrupted bits "0x6a", "0x64" at offset 0x424 correspond to
waitcount member of struct rdtgroup which was freed:
Slab corruption (Not tainted): kmalloc-2k start=ffff9504c5b0d000, len=2048
420: 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkjkkkkkkkkkkk
Single bit error detected. Probably bad RAM.
Run memtest86+ or a similar memory test tool.
Next obj: start=ffff9504c5b0d800, len=2048
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
Slab corruption (Not tainted): kmalloc-2k start=ffff9504c58ab800, len=2048
420: 6b 6b 6b 6b 64 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkdkkkkkkkkkkk
Prev obj: start=ffff9504c58ab000, len=2048
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
Fix this by taking reference count (waitcount) of rdtgrp into account in
the two call paths that currently do not do so. Instead of always
freeing the resource group it will only be freed if there are no waiters
on it. If there are waiters, the resource group will have its flags set
to RDT_DELETED.
It will be left to the waiter to free the resource group when it starts
running and finding that it was the last waiter and the resource group
has been removed (rdtgrp->flags & RDT_DELETED) since. (1) rdt_kill_sb()
-> rmdir_all_sub() -> free_all_child_rdtgrp() (2) rdtgroup_rmdir() ->
rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
in older stable trees.
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-2-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 841a024..db22ba0 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -2167,7 +2167,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
free_rmid(sentry->mon.rmid);
list_del(&sentry->mon.crdtgrp_list);
- kfree(sentry);
+
+ if (atomic_read(&sentry->waitcount) != 0)
+ sentry->flags = RDT_DELETED;
+ else
+ kfree(sentry);
}
}
@@ -2205,7 +2209,11 @@ static void rmdir_all_sub(void)
kernfs_remove(rdtgrp->kn);
list_del(&rdtgrp->rdtgroup_list);
- kfree(rdtgrp);
+
+ if (atomic_read(&rdtgrp->waitcount) != 0)
+ rdtgrp->flags = RDT_DELETED;
+ else
+ kfree(rdtgrp);
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
update_closid_rmid(cpu_online_mask, &rdtgroup_default);
--
1.8.3.1
Commit f3ee99087c8ca0ecfdd549ef5a94f557c42d5428 ("ASoC: tegra: Allow
24bit and 32bit samples") added 24-bit and 32-bit support for to the
Tegra30 I2S driver. However, there are two additional commits that are
also needed to get 24-bit and 32-bit support to work correctly. These
commits are not yet applied because there are still some review comments
that need to be addressed. With only this change applied, 24-bit and
32-bit support is advertised by the I2S driver, but it does not work and
the audio is distorted. Therefore, revert this patch for now until the
other changes are also ready.
Furthermore, a clock issue with 24-bit support has been identified with
this change and so if we revert this now, we can also fix that in the
updated version.
Cc: stable(a)vger.kernel.org
Reported-by: Dmitry Osipenko <digetx(a)gmail.com>
Signed-off-by: Jon Hunter <jonathanh(a)nvidia.com>
---
sound/soc/tegra/tegra30_i2s.c | 25 +++++--------------------
1 file changed, 5 insertions(+), 20 deletions(-)
diff --git a/sound/soc/tegra/tegra30_i2s.c b/sound/soc/tegra/tegra30_i2s.c
index dbed3c5408e7..d59882ec48f1 100644
--- a/sound/soc/tegra/tegra30_i2s.c
+++ b/sound/soc/tegra/tegra30_i2s.c
@@ -127,7 +127,7 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream,
struct device *dev = dai->dev;
struct tegra30_i2s *i2s = snd_soc_dai_get_drvdata(dai);
unsigned int mask, val, reg;
- int ret, sample_size, srate, i2sclock, bitcnt, audio_bits;
+ int ret, sample_size, srate, i2sclock, bitcnt;
struct tegra30_ahub_cif_conf cif_conf;
if (params_channels(params) != 2)
@@ -137,19 +137,8 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream,
switch (params_format(params)) {
case SNDRV_PCM_FORMAT_S16_LE:
val = TEGRA30_I2S_CTRL_BIT_SIZE_16;
- audio_bits = TEGRA30_AUDIOCIF_BITS_16;
sample_size = 16;
break;
- case SNDRV_PCM_FORMAT_S24_LE:
- val = TEGRA30_I2S_CTRL_BIT_SIZE_24;
- audio_bits = TEGRA30_AUDIOCIF_BITS_24;
- sample_size = 24;
- break;
- case SNDRV_PCM_FORMAT_S32_LE:
- val = TEGRA30_I2S_CTRL_BIT_SIZE_32;
- audio_bits = TEGRA30_AUDIOCIF_BITS_32;
- sample_size = 32;
- break;
default:
return -EINVAL;
}
@@ -181,8 +170,8 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream,
cif_conf.threshold = 0;
cif_conf.audio_channels = 2;
cif_conf.client_channels = 2;
- cif_conf.audio_bits = audio_bits;
- cif_conf.client_bits = audio_bits;
+ cif_conf.audio_bits = TEGRA30_AUDIOCIF_BITS_16;
+ cif_conf.client_bits = TEGRA30_AUDIOCIF_BITS_16;
cif_conf.expand = 0;
cif_conf.stereo_conv = 0;
cif_conf.replicate = 0;
@@ -317,18 +306,14 @@ static const struct snd_soc_dai_driver tegra30_i2s_dai_template = {
.channels_min = 2,
.channels_max = 2,
.rates = SNDRV_PCM_RATE_8000_96000,
- .formats = SNDRV_PCM_FMTBIT_S32_LE |
- SNDRV_PCM_FMTBIT_S24_LE |
- SNDRV_PCM_FMTBIT_S16_LE,
+ .formats = SNDRV_PCM_FMTBIT_S16_LE,
},
.capture = {
.stream_name = "Capture",
.channels_min = 2,
.channels_max = 2,
.rates = SNDRV_PCM_RATE_8000_96000,
- .formats = SNDRV_PCM_FMTBIT_S32_LE |
- SNDRV_PCM_FMTBIT_S24_LE |
- SNDRV_PCM_FMTBIT_S16_LE,
+ .formats = SNDRV_PCM_FMTBIT_S16_LE,
},
.ops = &tegra30_i2s_dai_ops,
.symmetric_rates = 1,
--
2.17.1
commit 334b0f4e9b1b4a1d475f803419d202f6c5e4d18e upstream.
There is a race condition which results in a deadlock when rmdir and
mkdir execute concurrently:
$ ls /sys/fs/resctrl/c1/mon_groups/m1/
cpus cpus_list mon_data tasks
Thread 1: rmdir /sys/fs/resctrl/c1
Thread 2: mkdir /sys/fs/resctrl/c1/mon_groups/m1
3 locks held by mkdir/48649:
#0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
#1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c13b>] filename_create+0x7b/0x170
#2: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
4 locks held by rmdir/48652:
#0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
#1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c3cf>] do_rmdir+0x13f/0x1e0
#2: (&type->i_mutex_dir_key#8){++++}, at: [<ffffffffb4c86d5d>] vfs_rmdir+0x4d/0x120
#3: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
Thread 1 is deleting control group "c1". Holding rdtgroup_mutex,
kernfs_remove() removes all kernfs nodes under directory "c1"
recursively, then waits for sub kernfs node "mon_groups" to drop active
reference.
Thread 2 is trying to create a subdirectory "m1" in the "mon_groups"
directory. The wrapper kernfs_iop_mkdir() takes an active reference to
the "mon_groups" directory but the code drops the active reference to
the parent directory "c1" instead.
As a result, Thread 1 is blocked on waiting for active reference to drop
and never release rdtgroup_mutex, while Thread 2 is also blocked on
trying to get rdtgroup_mutex.
Thread 1 (rdtgroup_rmdir) Thread 2 (rdtgroup_mkdir)
(rmdir /sys/fs/resctrl/c1) (mkdir /sys/fs/resctrl/c1/mon_groups/m1)
------------------------- -------------------------
kernfs_iop_mkdir
/*
* kn: "m1", parent_kn: "mon_groups",
* prgrp_kn: parent_kn->parent: "c1",
*
* "mon_groups", parent_kn->active++: 1
*/
kernfs_get_active(parent_kn)
kernfs_iop_rmdir
/* "c1", kn->active++ */
kernfs_get_active(kn)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
/* "c1", kn->active-- */
kernfs_break_active_protection(kn)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
sentry->flags = RDT_DELETED
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
kernfs_get(kn)
kernfs_remove(rdtgrp->kn)
__kernfs_remove
/* "mon_groups", sub_kn */
atomic_add(KN_DEACTIVATED_BIAS, &sub_kn->active)
kernfs_drain(sub_kn)
/*
* sub_kn->active == KN_DEACTIVATED_BIAS + 1,
* waiting on sub_kn->active to drop, but it
* never drops in Thread 2 which is blocked
* on getting rdtgroup_mutex.
*/
Thread 1 hangs here ---->
wait_event(sub_kn->active == KN_DEACTIVATED_BIAS)
...
rdtgroup_mkdir
rdtgroup_mkdir_mon(parent_kn, prgrp_kn)
mkdir_rdt_prepare(parent_kn, prgrp_kn)
rdtgroup_kn_lock_live(prgrp_kn)
atomic_inc(&rdtgrp->waitcount)
/*
* "c1", prgrp_kn->active--
*
* The active reference on "c1" is
* dropped, but not matching the
* actual active reference taken
* on "mon_groups", thus causing
* Thread 1 to wait forever while
* holding rdtgroup_mutex.
*/
kernfs_break_active_protection(
prgrp_kn)
/*
* Trying to get rdtgroup_mutex
* which is held by Thread 1.
*/
Thread 2 hangs here ----> mutex_lock
...
The problem is that the creation of a subdirectory in the "mon_groups"
directory incorrectly releases the active protection of its parent
directory instead of itself before it starts waiting for rdtgroup_mutex.
This is triggered by the rdtgroup_mkdir() flow calling
rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() with kernfs node of the
parent control group ("c1") as argument. It should be called with kernfs
node "mon_groups" instead. What is currently missing is that the
kn->priv of "mon_groups" is NULL instead of pointing to the rdtgrp.
Fix it by pointing kn->priv to rdtgrp when "mon_groups" is created. Then
it could be passed to rdtgroup_kn_lock_live()/rdtgroup_kn_unlock()
instead. And then it operates on the same rdtgroup structure but handles
the active reference of kernfs node "mon_groups" to prevent deadlock.
The same changes are also made to the "mon_data" directories.
This results in some unused function parameters that will be cleaned up
in follow-up patch as the focus here is on the fix only in support of
backporting efforts.
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
for older stable trees.
Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-4-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 77770ca..11c5acc 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -2005,7 +2005,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
- NULL, "mon_groups",
+ &rdtgroup_default, "mon_groups",
&kn_mongrp);
if (ret) {
dentry = ERR_PTR(ret);
@@ -2415,7 +2415,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
/*
* Create the mon_data directory first.
*/
- ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
+ ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
if (ret)
return ret;
@@ -2568,7 +2568,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
uint files = 0;
int ret;
- prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+ prdtgrp = rdtgroup_kn_lock_live(parent_kn);
rdt_last_cmd_clear();
if (!prdtgrp) {
ret = -ENODEV;
@@ -2643,7 +2643,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
kernfs_activate(kn);
/*
- * The caller unlocks the prgrp_kn upon success.
+ * The caller unlocks the parent_kn upon success.
*/
return 0;
@@ -2654,7 +2654,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
out_free_rgrp:
kfree(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -2692,7 +2692,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
*/
list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -2735,7 +2735,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
* Create an empty mon_groups directory to hold the subset
* of tasks and cpus to monitor.
*/
- ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
+ ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
if (ret) {
rdt_last_cmd_puts("kernfs subdir error\n");
goto out_del_list;
@@ -2751,7 +2751,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
out_common_fail:
mkdir_rdt_prepare_clean(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
--
1.8.3.1
commit 074fadee59ee7a9d2b216e9854bd4efb5dad679f upstream.
There is a race condition in the following scenario which results in an
use-after-free issue when reading a monitoring file and deleting the
parent ctrl_mon group concurrently:
Thread 1 calls atomic_inc() to take refcount of rdtgrp and then calls
kernfs_break_active_protection() to drop the active reference of kernfs
node in rdtgroup_kn_lock_live().
In Thread 2, kernfs_remove() is a blocking routine. It waits on all sub
kernfs nodes to drop the active reference when removing all subtree
kernfs nodes recursively. Thread 2 could block on kernfs_remove() until
Thread 1 calls kernfs_break_active_protection(). Only after
kernfs_remove() completes the refcount of rdtgrp could be trusted.
Before Thread 1 calls atomic_inc() and kernfs_break_active_protection(),
Thread 2 could call kfree() when the refcount of rdtgrp (sentry) is 0
instead of 1 due to the race.
In Thread 1, in rdtgroup_kn_unlock(), referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_mondata_show) Thread 2 (rdtgroup_rmdir)
-------------------------------- -------------------------
rdtgroup_kn_lock_live
/*
* kn active protection until
* kernfs_break_active_protection(kn)
*/
rdtgrp = kernfs_to_rdtgroup(kn)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
/*
* sentry->waitcount should be 1
* but is 0 now due to the race.
*/
kfree(sentry)*[1]
/*
* Only after kernfs_remove()
* completes, the refcount of
* rdtgrp could be trusted.
*/
atomic_inc(&rdtgrp->waitcount)
/* kn->active-- */
kernfs_break_active_protection(kn)
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
/*
* Blocking routine, wait for
* all sub kernfs nodes to drop
* active reference in
* kernfs_break_active_protection.
*/
kernfs_remove(rdtgrp->kn)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(
&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kernfs_unbreak_active_protection(kn)
kfree(rdtgrp)
mutex_lock
mon_event_read
rdtgroup_kn_unlock
mutex_unlock
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [1].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
/* kn->active++ */
kernfs_unbreak_active_protection(kn)
kfree(rdtgrp)
Fix it by moving free_all_child_rdtgrp() to after kernfs_remove() in
rdtgroup_rmdir_ctrl() to ensure it has the accurate refcount of rdtgrp.
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
for older stable trees.
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-3-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index db22ba0..77770ca 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -2877,13 +2877,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
closid_free(rdtgrp->closid);
free_rmid(rdtgrp->mon.rmid);
+ rdtgroup_ctrl_remove(kn, rdtgrp);
+
/*
* Free all the child monitor group rmids.
*/
free_all_child_rdtgrp(rdtgrp);
- rdtgroup_ctrl_remove(kn, rdtgrp);
-
return 0;
}
--
1.8.3.1
commit 334b0f4e9b1b4a1d475f803419d202f6c5e4d18e upstream.
There is a race condition which results in a deadlock when rmdir and
mkdir execute concurrently:
$ ls /sys/fs/resctrl/c1/mon_groups/m1/
cpus cpus_list mon_data tasks
Thread 1: rmdir /sys/fs/resctrl/c1
Thread 2: mkdir /sys/fs/resctrl/c1/mon_groups/m1
3 locks held by mkdir/48649:
#0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
#1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c13b>] filename_create+0x7b/0x170
#2: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
4 locks held by rmdir/48652:
#0: (sb_writers#17){.+.+}, at: [<ffffffffb4ca2aa0>] mnt_want_write+0x20/0x50
#1: (&type->i_mutex_dir_key#8/1){+.+.}, at: [<ffffffffb4c8c3cf>] do_rmdir+0x13f/0x1e0
#2: (&type->i_mutex_dir_key#8){++++}, at: [<ffffffffb4c86d5d>] vfs_rmdir+0x4d/0x120
#3: (rdtgroup_mutex){+.+.}, at: [<ffffffffb4a4389d>] rdtgroup_kn_lock_live+0x3d/0x70
Thread 1 is deleting control group "c1". Holding rdtgroup_mutex,
kernfs_remove() removes all kernfs nodes under directory "c1"
recursively, then waits for sub kernfs node "mon_groups" to drop active
reference.
Thread 2 is trying to create a subdirectory "m1" in the "mon_groups"
directory. The wrapper kernfs_iop_mkdir() takes an active reference to
the "mon_groups" directory but the code drops the active reference to
the parent directory "c1" instead.
As a result, Thread 1 is blocked on waiting for active reference to drop
and never release rdtgroup_mutex, while Thread 2 is also blocked on
trying to get rdtgroup_mutex.
Thread 1 (rdtgroup_rmdir) Thread 2 (rdtgroup_mkdir)
(rmdir /sys/fs/resctrl/c1) (mkdir /sys/fs/resctrl/c1/mon_groups/m1)
------------------------- -------------------------
kernfs_iop_mkdir
/*
* kn: "m1", parent_kn: "mon_groups",
* prgrp_kn: parent_kn->parent: "c1",
*
* "mon_groups", parent_kn->active++: 1
*/
kernfs_get_active(parent_kn)
kernfs_iop_rmdir
/* "c1", kn->active++ */
kernfs_get_active(kn)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
/* "c1", kn->active-- */
kernfs_break_active_protection(kn)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
sentry->flags = RDT_DELETED
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
kernfs_get(kn)
kernfs_remove(rdtgrp->kn)
__kernfs_remove
/* "mon_groups", sub_kn */
atomic_add(KN_DEACTIVATED_BIAS, &sub_kn->active)
kernfs_drain(sub_kn)
/*
* sub_kn->active == KN_DEACTIVATED_BIAS + 1,
* waiting on sub_kn->active to drop, but it
* never drops in Thread 2 which is blocked
* on getting rdtgroup_mutex.
*/
Thread 1 hangs here ---->
wait_event(sub_kn->active == KN_DEACTIVATED_BIAS)
...
rdtgroup_mkdir
rdtgroup_mkdir_mon(parent_kn, prgrp_kn)
mkdir_rdt_prepare(parent_kn, prgrp_kn)
rdtgroup_kn_lock_live(prgrp_kn)
atomic_inc(&rdtgrp->waitcount)
/*
* "c1", prgrp_kn->active--
*
* The active reference on "c1" is
* dropped, but not matching the
* actual active reference taken
* on "mon_groups", thus causing
* Thread 1 to wait forever while
* holding rdtgroup_mutex.
*/
kernfs_break_active_protection(
prgrp_kn)
/*
* Trying to get rdtgroup_mutex
* which is held by Thread 1.
*/
Thread 2 hangs here ----> mutex_lock
...
The problem is that the creation of a subdirectory in the "mon_groups"
directory incorrectly releases the active protection of its parent
directory instead of itself before it starts waiting for rdtgroup_mutex.
This is triggered by the rdtgroup_mkdir() flow calling
rdtgroup_kn_lock_live()/rdtgroup_kn_unlock() with kernfs node of the
parent control group ("c1") as argument. It should be called with kernfs
node "mon_groups" instead. What is currently missing is that the
kn->priv of "mon_groups" is NULL instead of pointing to the rdtgrp.
Fix it by pointing kn->priv to rdtgrp when "mon_groups" is created. Then
it could be passed to rdtgroup_kn_lock_live()/rdtgroup_kn_unlock()
instead. And then it operates on the same rdtgroup structure but handles
the active reference of kernfs node "mon_groups" to prevent deadlock.
The same changes are also made to the "mon_data" directories.
This results in some unused function parameters that will be cleaned up
in follow-up patch as the focus here is on the fix only in support of
backporting efforts.
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
for older stable trees.
Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-4-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 0157496..0ec30b2 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1107,7 +1107,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn,
- NULL, "mon_groups",
+ &rdtgroup_default, "mon_groups",
&kn_mongrp);
if (ret) {
dentry = ERR_PTR(ret);
@@ -1499,7 +1499,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
/*
* Create the mon_data directory first.
*/
- ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
+ ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
if (ret)
return ret;
@@ -1533,7 +1533,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
uint files = 0;
int ret;
- prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+ prdtgrp = rdtgroup_kn_lock_live(parent_kn);
if (!prdtgrp) {
ret = -ENODEV;
goto out_unlock;
@@ -1589,7 +1589,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
kernfs_activate(kn);
/*
- * The caller unlocks the prgrp_kn upon success.
+ * The caller unlocks the parent_kn upon success.
*/
return 0;
@@ -1600,7 +1600,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
out_free_rgrp:
kfree(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -1638,7 +1638,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
*/
list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
@@ -1675,7 +1675,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
* Create an empty mon_groups directory to hold the subset
* of tasks and cpus to monitor.
*/
- ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
+ ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
if (ret)
goto out_id_free;
}
@@ -1688,7 +1688,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
out_common_fail:
mkdir_rdt_prepare_clean(rdtgrp);
out_unlock:
- rdtgroup_kn_unlock(prgrp_kn);
+ rdtgroup_kn_unlock(parent_kn);
return ret;
}
--
1.8.3.1
commit 074fadee59ee7a9d2b216e9854bd4efb5dad679f upstream.
There is a race condition in the following scenario which results in an
use-after-free issue when reading a monitoring file and deleting the
parent ctrl_mon group concurrently:
Thread 1 calls atomic_inc() to take refcount of rdtgrp and then calls
kernfs_break_active_protection() to drop the active reference of kernfs
node in rdtgroup_kn_lock_live().
In Thread 2, kernfs_remove() is a blocking routine. It waits on all sub
kernfs nodes to drop the active reference when removing all subtree
kernfs nodes recursively. Thread 2 could block on kernfs_remove() until
Thread 1 calls kernfs_break_active_protection(). Only after
kernfs_remove() completes the refcount of rdtgrp could be trusted.
Before Thread 1 calls atomic_inc() and kernfs_break_active_protection(),
Thread 2 could call kfree() when the refcount of rdtgrp (sentry) is 0
instead of 1 due to the race.
In Thread 1, in rdtgroup_kn_unlock(), referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_mondata_show) Thread 2 (rdtgroup_rmdir)
-------------------------------- -------------------------
rdtgroup_kn_lock_live
/*
* kn active protection until
* kernfs_break_active_protection(kn)
*/
rdtgrp = kernfs_to_rdtgroup(kn)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
/*
* sentry->waitcount should be 1
* but is 0 now due to the race.
*/
kfree(sentry)*[1]
/*
* Only after kernfs_remove()
* completes, the refcount of
* rdtgrp could be trusted.
*/
atomic_inc(&rdtgrp->waitcount)
/* kn->active-- */
kernfs_break_active_protection(kn)
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
/*
* Blocking routine, wait for
* all sub kernfs nodes to drop
* active reference in
* kernfs_break_active_protection.
*/
kernfs_remove(rdtgrp->kn)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(
&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kernfs_unbreak_active_protection(kn)
kfree(rdtgrp)
mutex_lock
mon_event_read
rdtgroup_kn_unlock
mutex_unlock
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [1].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
/* kn->active++ */
kernfs_unbreak_active_protection(kn)
kfree(rdtgrp)
Fix it by moving free_all_child_rdtgrp() to after kernfs_remove() in
rdtgroup_rmdir_ctrl() to ensure it has the accurate refcount of rdtgrp.
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
for older stable trees.
Upstream commit 17eafd076291 ("x86/intel_rdt: Split resource group
removal in two") moved part of resource group removal code from
rdtgroup_rmdir_mon() into a separate function rdtgroup_ctrl_remove().
Apply the change against original code base of rdtgroup_rmdir_mon() for
older stable trees.
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-3-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 7349969..0157496 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1800,11 +1800,6 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
closid_free(rdtgrp->closid);
free_rmid(rdtgrp->mon.rmid);
- /*
- * Free all the child monitor group rmids.
- */
- free_all_child_rdtgrp(rdtgrp);
-
list_del(&rdtgrp->rdtgroup_list);
/*
@@ -1814,6 +1809,11 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
kernfs_get(kn);
kernfs_remove(rdtgrp->kn);
+ /*
+ * Free all the child monitor group rmids.
+ */
+ free_all_child_rdtgrp(rdtgrp);
+
return 0;
}
--
1.8.3.1
commit b8511ccc75c033f6d54188ea4df7bf1e85778740 upstream.
A resource group (rdtgrp) contains a reference count (rdtgrp->waitcount)
that indicates how many waiters expect this rdtgrp to exist. Waiters
could be waiting on rdtgroup_mutex or some work sitting on a task's
workqueue for when the task returns from kernel mode or exits.
The deletion of a rdtgrp is intended to have two phases:
(1) while holding rdtgroup_mutex the necessary cleanup is done and
rdtgrp->flags is set to RDT_DELETED,
(2) after releasing the rdtgroup_mutex, the rdtgrp structure is freed
only if there are no waiters and its flag is set to RDT_DELETED. Upon
gaining access to rdtgroup_mutex or rdtgrp, a waiter is required to check
for the RDT_DELETED flag.
When unmounting the resctrl file system or deleting ctrl_mon groups,
all of the subdirectories are removed and the data structure of rdtgrp
is forcibly freed without checking rdtgrp->waitcount. If at this point
there was a waiter on rdtgrp then a use-after-free issue occurs when the
waiter starts running and accesses the rdtgrp structure it was waiting
on.
See kfree() calls in [1], [2] and [3] in these two call paths in
following scenarios:
(1) rdt_kill_sb() -> rmdir_all_sub() -> free_all_child_rdtgrp()
(2) rdtgroup_rmdir() -> rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
There are several scenarios that result in use-after-free issue in
following:
Scenario 1:
-----------
In Thread 1, rdtgroup_tasks_write() adds a task_work callback
move_myself(). If move_myself() is scheduled to execute after Thread 2
rdt_kill_sb() is finished, referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_tasks_write) Thread 2 (rdt_kill_sb)
------------------------------- ----------------------
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_move_task
__rdtgroup_move_task
/*
* Take an extra refcount, so rdtgrp cannot be freed
* before the call back move_myself has been invoked
*/
atomic_inc(&rdtgrp->waitcount)
/* Callback move_myself will be scheduled for later */
task_work_add(move_myself)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
mutex_lock
rmdir_all_sub
/*
* sentry and rdtgrp are freed
* without checking refcount
*/
free_all_child_rdtgrp
kfree(sentry)*[1]
kfree(rdtgrp)*[2]
mutex_unlock
/*
* Callback is scheduled to execute
* after rdt_kill_sb is finished
*/
move_myself
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [1] or [2].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
Scenario 2:
-----------
In Thread 1, rdtgroup_tasks_write() adds a task_work callback
move_myself(). If move_myself() is scheduled to execute after Thread 2
rdtgroup_rmdir() is finished, referring to earlier rdtgrp memory
(rdtgrp->waitcount) which was already freed in Thread 2 results in
use-after-free issue.
Thread 1 (rdtgroup_tasks_write) Thread 2 (rdtgroup_rmdir)
------------------------------- -------------------------
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_move_task
__rdtgroup_move_task
/*
* Take an extra refcount, so rdtgrp cannot be freed
* before the call back move_myself has been invoked
*/
atomic_inc(&rdtgrp->waitcount)
/* Callback move_myself will be scheduled for later */
task_work_add(move_myself)
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
rdtgroup_kn_lock_live
atomic_inc(&rdtgrp->waitcount)
mutex_lock
rdtgroup_rmdir_ctrl
free_all_child_rdtgrp
/*
* sentry is freed without
* checking refcount
*/
kfree(sentry)*[3]
rdtgroup_ctrl_remove
rdtgrp->flags = RDT_DELETED
rdtgroup_kn_unlock
mutex_unlock
atomic_dec_and_test(
&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
/*
* Callback is scheduled to execute
* after rdt_kill_sb is finished
*/
move_myself
/*
* Use-after-free: refer to earlier rdtgrp
* memory which was freed in [3].
*/
atomic_dec_and_test(&rdtgrp->waitcount)
&& (flags & RDT_DELETED)
kfree(rdtgrp)
If CONFIG_DEBUG_SLAB=y, Slab corruption on kmalloc-2k can be observed
like following. Note that "0x6b" is POISON_FREE after kfree(). The
corrupted bits "0x6a", "0x64" at offset 0x424 correspond to
waitcount member of struct rdtgroup which was freed:
Slab corruption (Not tainted): kmalloc-2k start=ffff9504c5b0d000, len=2048
420: 6b 6b 6b 6b 6a 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkjkkkkkkkkkkk
Single bit error detected. Probably bad RAM.
Run memtest86+ or a similar memory test tool.
Next obj: start=ffff9504c5b0d800, len=2048
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
Slab corruption (Not tainted): kmalloc-2k start=ffff9504c58ab800, len=2048
420: 6b 6b 6b 6b 64 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkdkkkkkkkkkkk
Prev obj: start=ffff9504c58ab000, len=2048
000: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
010: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
Fix this by taking reference count (waitcount) of rdtgrp into account in
the two call paths that currently do not do so. Instead of always
freeing the resource group it will only be freed if there are no waiters
on it. If there are waiters, the resource group will have its flags set
to RDT_DELETED.
It will be left to the waiter to free the resource group when it starts
running and finding that it was the last waiter and the resource group
has been removed (rdtgrp->flags & RDT_DELETED) since. (1) rdt_kill_sb()
-> rmdir_all_sub() -> free_all_child_rdtgrp() (2) rdtgroup_rmdir() ->
rdtgroup_rmdir_ctrl() -> free_all_child_rdtgrp()
Backporting notes:
Since upstream commit fa7d949337cc ("x86/resctrl: Rename and move rdt
files to a separate directory"), the file
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c has been renamed and moved to
arch/x86/kernel/cpu/resctrl/rdtgroup.c.
Apply the change against file arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
in older stable trees.
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Suggested-by: Reinette Chatre <reinette.chatre(a)intel.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen(a)intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre(a)intel.com>
Reviewed-by: Tony Luck <tony.luck(a)intel.com>
Acked-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/1578500886-21771-2-git-send-email-xiaochen.shen@i…
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 2dae1b3..7349969 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1260,7 +1260,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
free_rmid(sentry->mon.rmid);
list_del(&sentry->mon.crdtgrp_list);
- kfree(sentry);
+
+ if (atomic_read(&sentry->waitcount) != 0)
+ sentry->flags = RDT_DELETED;
+ else
+ kfree(sentry);
}
}
@@ -1294,7 +1298,11 @@ static void rmdir_all_sub(void)
kernfs_remove(rdtgrp->kn);
list_del(&rdtgrp->rdtgroup_list);
- kfree(rdtgrp);
+
+ if (atomic_read(&rdtgrp->waitcount) != 0)
+ rdtgrp->flags = RDT_DELETED;
+ else
+ kfree(rdtgrp);
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
update_closid_rmid(cpu_online_mask, &rdtgroup_default);
--
1.8.3.1
Please pull the following change since commit
68353984d63d8d7ea728819dbdb7aecc5f32d360:
Merge tag '5.6-smb3-fixes-and-dfs-and-readdir-improvements' of
git://git.samba.org/sfrench/cifs-2.6 (2020-01-28 15:34:03 -0800)
are available in the Git repository at:
git://git.samba.org/sfrench/cifs-2.6.git tags/5.6-rc-small-smb3-fix-for-stable
for you to fetch changes up to b581098482e6f177a4f64ea021fd5a9327ea08d5:
cifs: update internal module version number (2020-01-31 15:13:22 -0600)
----------------------------------------------------------------
Small SMB3 fix for stable (fixes problem with reconnect for soft mounts)
----------------------------------------------------------------
Ronnie Sahlberg (1):
cifs: fix soft mounts hanging in the reconnect code
Steve French (1):
cifs: update internal module version number
fs/cifs/cifsfs.h | 2 +-
fs/cifs/smb2pdu.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
--
Thanks,
Steve
In year 2007 high performance SSD was still expensive, in order to
save more space for real workload or meta data, the readahead I/Os
for non-meta data was bypassed and not cached on SSD.
In now days, SSD price drops a lot and people can find larger size
SSD with more comfortable price. It is unncessary to alway bypass
normal readahead I/Os to save SSD space for now.
This patch adds options for readahead data cache policies via sysfs
file /sys/block/bcache<N>/readahead_cache_policy, the options are,
- "all": cache all readahead data I/Os.
- "meta-only": only cache meta data, and bypass other regular I/Os.
If users want to make bcache continue to only cache readahead request
for metadata and bypass regular data readahead, please set "meta-only"
to this sysfs file. By default, bcache will back to cache all read-
ahead requests now.
Cc: stable(a)vger.kernel.org
Signed-off-by: Coly Li <colyli(a)suse.de>
Acked-by: Eric Wheeler <bcache(a)linux.ewheeler.net>
Cc: Michael Lyle <mlyle(a)lyle.org>
---
drivers/md/bcache/bcache.h | 3 +++
drivers/md/bcache/request.c | 17 ++++++++++++-----
drivers/md/bcache/sysfs.c | 22 ++++++++++++++++++++++
3 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index adf26a21fcd1..74a9849ea164 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -330,6 +330,9 @@ struct cached_dev {
*/
atomic_t has_dirty;
+#define BCH_CACHE_READA_ALL 0
+#define BCH_CACHE_READA_META_ONLY 1
+ unsigned int cache_readahead_policy;
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 73478a91a342..820d8402a1dc 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -379,13 +379,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
goto skip;
/*
- * Flag for bypass if the IO is for read-ahead or background,
- * unless the read-ahead request is for metadata
+ * If the bio is for read-ahead or background IO, bypass it or
+ * not depends on the following situations,
+ * - If the IO is for meta data, always cache it and no bypass
+ * - If the IO is not meta data, check dc->cache_reada_policy,
+ * BCH_CACHE_READA_ALL: cache it and not bypass
+ * BCH_CACHE_READA_META_ONLY: not cache it and bypass
+ * That is, read-ahead request for metadata always get cached
* (eg, for gfs2 or xfs).
*/
- if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
- !(bio->bi_opf & (REQ_META|REQ_PRIO)))
- goto skip;
+ if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
+ if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
+ (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
+ goto skip;
+ }
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 733e2ddf3c78..3470fae4eabc 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = {
NULL
};
+static const char * const bch_reada_cache_policies[] = {
+ "all",
+ "meta-only",
+ NULL
+};
+
/* Default is 0 ("auto") */
static const char * const bch_stop_on_failure_modes[] = {
"auto",
@@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
+rw_attribute(readahead_cache_policy);
rw_attribute(stop_when_cache_set_failed);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
@@ -168,6 +175,11 @@ SHOW(__bch_cached_dev)
bch_cache_modes,
BDEV_CACHE_MODE(&dc->sb));
+ if (attr == &sysfs_readahead_cache_policy)
+ return bch_snprint_string_list(buf, PAGE_SIZE,
+ bch_reada_cache_policies,
+ dc->cache_readahead_policy);
+
if (attr == &sysfs_stop_when_cache_set_failed)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_stop_on_failure_modes,
@@ -353,6 +365,15 @@ STORE(__cached_dev)
}
}
+ if (attr == &sysfs_readahead_cache_policy) {
+ v = __sysfs_match_string(bch_reada_cache_policies, -1, buf);
+ if (v < 0)
+ return v;
+
+ if ((unsigned int) v != dc->cache_readahead_policy)
+ dc->cache_readahead_policy = v;
+ }
+
if (attr == &sysfs_stop_when_cache_set_failed) {
v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
if (v < 0)
@@ -467,6 +488,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_data_csum,
#endif
&sysfs_cache_mode,
+ &sysfs_readahead_cache_policy,
&sysfs_stop_when_cache_set_failed,
&sysfs_writeback_metadata,
&sysfs_writeback_running,
--
2.16.4
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: febac332a819f0e764aa4da62757ba21d18c182b
Gitweb: https://git.kernel.org/tip/febac332a819f0e764aa4da62757ba21d18c182b
Author: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
AuthorDate: Fri, 31 Jan 2020 19:08:59 +03:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Sat, 01 Feb 2020 11:07:56 +01:00
clocksource: Prevent double add_timer_on() for watchdog_timer
Kernel crashes inside QEMU/KVM are observed:
kernel BUG at kernel/time/timer.c:1154!
BUG_ON(timer_pending(timer) || !timer->function) in add_timer_on().
At the same time another cpu got:
general protection fault: 0000 [#1] SMP PTI of poinson pointer 0xdead000000000200 in:
__hlist_del at include/linux/list.h:681
(inlined by) detach_timer at kernel/time/timer.c:818
(inlined by) expire_timers at kernel/time/timer.c:1355
(inlined by) __run_timers at kernel/time/timer.c:1686
(inlined by) run_timer_softirq at kernel/time/timer.c:1699
Unfortunately kernel logs are badly scrambled, stacktraces are lost.
Printing the timer->function before the BUG_ON() pointed to
clocksource_watchdog().
The execution of clocksource_watchdog() can race with a sequence of
clocksource_stop_watchdog() .. clocksource_start_watchdog():
expire_timers()
detach_timer(timer, true);
timer->entry.pprev = NULL;
raw_spin_unlock_irq(&base->lock);
call_timer_fn
clocksource_watchdog()
clocksource_watchdog_kthread() or
clocksource_unbind()
spin_lock_irqsave(&watchdog_lock, flags);
clocksource_stop_watchdog();
del_timer(&watchdog_timer);
watchdog_running = 0;
spin_unlock_irqrestore(&watchdog_lock, flags);
spin_lock_irqsave(&watchdog_lock, flags);
clocksource_start_watchdog();
add_timer_on(&watchdog_timer, ...);
watchdog_running = 1;
spin_unlock_irqrestore(&watchdog_lock, flags);
spin_lock(&watchdog_lock);
add_timer_on(&watchdog_timer, ...);
BUG_ON(timer_pending(timer) || !timer->function);
timer_pending() -> true
BUG()
I.e. inside clocksource_watchdog() watchdog_timer could be already armed.
Check timer_pending() before calling add_timer_on(). This is sufficient as
all operations are synchronized by watchdog_lock.
Fixes: 75c5158f70c0 ("timekeeping: Update clocksource with stop_machine")
Signed-off-by: Konstantin Khlebnikov <khlebnikov(a)yandex-team.ru>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/158048693917.4378.13823603769948933793.stgit@buzz
---
kernel/time/clocksource.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index fff5f64..428beb6 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -293,8 +293,15 @@ static void clocksource_watchdog(struct timer_list *unused)
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(cpu_online_mask);
- watchdog_timer.expires += WATCHDOG_INTERVAL;
- add_timer_on(&watchdog_timer, next_cpu);
+
+ /*
+ * Arm timer if not already pending: could race with concurrent
+ * pair clocksource_stop_watchdog() clocksource_start_watchdog().
+ */
+ if (!timer_pending(&watchdog_timer)) {
+ watchdog_timer.expires += WATCHDOG_INTERVAL;
+ add_timer_on(&watchdog_timer, next_cpu);
+ }
out:
spin_unlock(&watchdog_lock);
}
Hi,
On 1/31/20 6:17 PM, Z R wrote:
> Hi Benjamin,
> in last log touchpad.log (omg, should be keyboard.log), I pressed fn-f3 multiple times. I got one two liner:
>
> # ReportID: 3 / Wireless Radio Button: 0 | #
> E: 000007.606583 2 03 00
>
> every time i release f3. Does not matter what is happening with fn-key. (could be released already or still pushed down). This log was collected with last patch from Hans applied.
>
> Sorry for the mess I caused :-) I still don't get how you guys manage to have your emails so well polished and readable.
I don't think you've, caused a mess. Thank you both for the bug report and for your
help in testing this.
One last test request, can you run evemu-record on a kernel with my latest simplified patch,
select the keyboard device and then press (and release) the "rfkill" key and see it generates
a RF_KILL key press + release evdev event?
What would also be helpful is the output of:
ls -l /sys/bus/hid/devices/0003*/driver
Regards,
Hans
> pá 31. 1. 2020 v 18:00 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com>> napsal:
>
> On Fri, Jan 31, 2020 at 5:09 PM Z R <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com>> wrote:
> >
> > I believe I pressed wifi button on both replays for keyboard.
>
> Yep, I can see that. Just to double check, on the last log, you
> pressed the wifi button twice?
>
> Anyway, thanks for all the logs, I should have enough to implement the
> regression tests.
>
> Cheers,
> Benjamin
>
> >
> > With latest patch from Hans on top of v5.5.0 touchpads "two finger scrolling" is working again. Attaching current hid-record for keyboard with Wifi button pressed. Events in log appeared after f3 button was "released".
> >
> > Thanks
> >
> > Zdeněk
> >
> > pá 31. 1. 2020 v 16:45 odesílatel Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> napsal:
> >>
> >> Hi,
> >>
> >> On 1/31/20 4:38 PM, Z R wrote:
> >> > Hi Benjamin,
> >> > hid-record for keyboard and touchpad. With Commit 8f18eca9ebc5 reverted and from unmodified kernel.
> >> >
> >> > I hope it is what you asked for :-)
> >> >
> >> > Currently waiting for reworked patch from Hans.
> >>
> >> I just send you the reworked patch.
> >>
> >> Does the recordning include pressing of the wlan on/off key (Fn + F3 I believe) ?
> >> That is the whole reason why the special hid-ite driver is necessary.
> >>
> >> Benjamin about the wlan on/off key. AFAICR on a press + release of the key a
> >> single hid input report for the generic-desktop Wireless Radio Controls group
> >> is send. This input-report only has the one button with usage code HID_GD_RFKILL_BTN
> >> in there and it is always 0. It is as if the input-report is only send on release
> >> and not on press. So the hid-ite code emulates a press + release whenever the
> >> input-report is send.
> >>
> >> IOW the receiving of the input report is (ab)used as indication of the button
> >> having been pressed.
> >>
> >> Regards,
> >>
> >> Hans
> >>
> >>
> >> >
> >> > Bye for now
> >> > Zdeněk
> >> >
> >> > pá 31. 1. 2020 v 15:12 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com> <mailto:benjamin.tissoires@redhat.com <mailto:benjamin.tissoires@redhat.com>>> napsal:
> >> >
> >> > On Fri, Jan 31, 2020 at 3:04 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com> <mailto:hdegoede@redhat.com <mailto:hdegoede@redhat.com>>> wrote:
> >> > >
> >> > > Hi,
> >> > >
> >> > > On 1/31/20 2:54 PM, Benjamin Tissoires wrote:
> >> > > > On Fri, Jan 31, 2020 at 2:41 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com> <mailto:hdegoede@redhat.com <mailto:hdegoede@redhat.com>>> wrote:
> >> > > >>
> >> > > >> Hi,
> >> > > >>
> >> > > >> On 1/31/20 2:10 PM, Benjamin Tissoires wrote:
> >> > > >>> Hi Hans,
> >> > > >>>
> >> > > >>> On Fri, Jan 31, 2020 at 1:46 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com> <mailto:hdegoede@redhat.com <mailto:hdegoede@redhat.com>>> wrote:
> >> > > >>>>
> >> > > >>>> Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
> >> > > >>>> dock") added the USB id for the Acer SW5-012's keyboard dock to the
> >> > > >>>> hid-ite driver to fix the rfkill driver not working.
> >> > > >>>>
> >> > > >>>> Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
> >> > > >>>> "Wireless Radio Control" bits which need the special hid-ite driver on the
> >> > > >>>> second USB interface (the mouse interface) and their touchpad only supports
> >> > > >>>> mouse emulation, so using generic hid-input handling for anything but
> >> > > >>>> the "Wireless Radio Control" bits is fine. On these devices we simply bind
> >> > > >>>> to all USB interfaces.
> >> > > >>>>
> >> > > >>>> But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
> >> > > >>>> (SW5-012)'s touchpad not only does mouse emulation it also supports
> >> > > >>>> HID-multitouch and all the keys including the "Wireless Radio Control"
> >> > > >>>> bits have been moved to the first USB interface (the keyboard intf).
> >> > > >>>>
> >> > > >>>> So we need hid-ite to handle the first (keyboard) USB interface and have
> >> > > >>>> it NOT bind to the second (mouse) USB interface so that that can be
> >> > > >>>> handled by hid-multitouch.c and we get proper multi-touch support.
> >> > > >>>>
> >> > > >>>> This commit adds a match callback to hid-ite which makes it only
> >> > > >>>> match the first USB interface when running on the Acer SW5-012,
> >> > > >>>> fixing the regression to mouse-emulation mode introduced by adding the
> >> > > >>>> keyboard dock USB id.
> >> > > >>>>
> >> > > >>>> Note the match function only does the special only bind to the first
> >> > > >>>> USB interface on the Acer SW5-012, on other devices the hid-ite driver
> >> > > >>>> actually must bind to the second interface as that is where the
> >> > > >>>> "Wireless Radio Control" bits are.
> >> > > >>>
> >> > > >>> This is not a full review, but a couple of things that popped out
> >> > > >>> while scrolling through the patch.
> >> > > >>>
> >> > > >>>>
> >> > > >>>> Cc: stable(a)vger.kernel.org <mailto:stable@vger.kernel.org> <mailto:stable@vger.kernel.org <mailto:stable@vger.kernel.org>>
> >> > > >>>> Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
> >> > > >>>> Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com> <mailto:zdenda.rampas@gmail.com <mailto:zdenda.rampas@gmail.com>>>
> >> > > >>>> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com> <mailto:hdegoede@redhat.com <mailto:hdegoede@redhat.com>>>
> >> > > >>>> ---
> >> > > >>>> drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
> >> > > >>>> 1 file changed, 34 insertions(+)
> >> > > >>>>
> >> > > >>>> diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
> >> > > >>>> index c436e12feb23..69a4ddfd033d 100644
> >> > > >>>> --- a/drivers/hid/hid-ite.c
> >> > > >>>> +++ b/drivers/hid/hid-ite.c
> >> > > >>>> @@ -8,9 +8,12 @@
> >> > > >>>> #include <linux/input.h>
> >> > > >>>> #include <linux/hid.h>
> >> > > >>>> #include <linux/module.h>
> >> > > >>>> +#include <linux/usb.h>
> >> > > >>>>
> >> > > >>>> #include "hid-ids.h"
> >> > > >>>>
> >> > > >>>> +#define ITE8595_KBD_USB_INTF 0
> >> > > >>>> +
> >> > > >>>> static int ite_event(struct hid_device *hdev, struct hid_field *field,
> >> > > >>>> struct hid_usage *usage, __s32 value)
> >> > > >>>> {
> >> > > >>>> @@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
> >> > > >>>> return 0;
> >> > > >>>> }
> >> > > >>>>
> >> > > >>>> +static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
> >> > > >>>> +{
> >> > > >>>> + struct usb_interface *intf;
> >> > > >>>> +
> >> > > >>>> + if (ignore_special_driver)
> >> > > >>>> + return false;
> >> > > >>>> +
> >> > > >>>> + /*
> >> > > >>>> + * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
> >> > > >>>> + * have the "Wireless Radio Control" bits which need this special
> >> > > >>>> + * driver on the second USB interface (the mouse interface). On
> >> > > >>>> + * these devices we simply bind to all USB interfaces.
> >> > > >>>> + *
> >> > > >>>> + * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
> >> > > >>>> + * not only does mouse emulation it also supports HID-multitouch
> >> > > >>>> + * and all the keys including the "Wireless Radio Control" bits
> >> > > >>>> + * have been moved to the first USB interface (the keyboard intf).
> >> > > >>>> + *
> >> > > >>>> + * We want the hid-multitouch driver to bind to the touchpad, so on
> >> > > >>>> + * the Acer SW5-012 we should only bind to the keyboard USB intf.
> >> > > >>>> + */
> >> > > >>>> + if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
> >> > > >>>> + hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
> >> > > >>>
> >> > > >>> Isn't there an existing matching function we can use here, instead of
> >> > > >>> checking each individual field?
> >> > > >>
> >> > > >> There is hid_match_one_id() but that is not exported (can be fixed) and it
> >> > > >> requires a struct hid_device_id, which either requires declaring an extra
> >> > > >> standalone struct hid_device_id for the SW5-012 kbd-dock, or hardcoding an
> >> > > >> index into the existing hid_device_id array for the driver (with the hardcoding
> >> > > >> being error prone, so not a good idea).
> >> > > >>
> >> > > >> Given the problems with using hid_match_one_id() I decided to just go with
> >> > > >> the above.
> >> > > >
> >> > > > right. An other solution would be to have a local macro/function that
> >> > > > does that. Because as soon as you start adding a quirk, an other comes
> >> > > > right after.
> >> > > >
> >> > > >>
> >> > > >> But see below.
> >> > > >>
> >> > > >>>
> >> > > >>>> + return true;
> >> > > >>>> +
> >> > > >>>> + intf = to_usb_interface(hdev->dev.parent);
> >> > > >>>
> >> > > >>> And this is oops-prone. You need:
> >> > > >>> - ensure hid_is_using_ll_driver(hdev, &usb_hid_driver) returns true.
> >> > > >>> - add a dependency on USBHID in the KConfig now that you are checking
> >> > > >>> on the USB transport layer.
> >> > > >>>
> >> > > >>> That being said, I would love instead:
> >> > > >>> - to have a non USB version of this match, where you decide which
> >> > > >>> component needs to be handled based on the report descriptor
> >> > > >>
> >> > > >> Actually your idea to use the desciptors is not bad, but since what
> >> > > >> we really want is to not bind to the interface which is marked for the
> >> > > >> hid-multitouch driver I just realized we can just check that.
> >> > > >>
> >> > > >> So how about:
> >> > > >>
> >> > > >> static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
> >> > > >> {
> >> > > >> if (ignore_special_driver)
> >> > > >> return false;
> >> > > >>
> >> > > >> /*
> >> > > >> * Some keyboard docks with an ITE 8595 keyboard/touchpad controller
> >> > > >> * support the HID multitouch protocol for the touchpad, in that
> >> > > >> * case the "Wireless Radio Control" bits which we care about are
> >> > > >> * on the other interface; and we should not bind to the multitouch
> >> > > >> * capable interface as that breaks multitouch support.
> >> > > >> */
> >> > > >> return hdev->group != HID_GROUP_MULTITOUCH_WIN_8;
> >> > > >> }
> >> > > >
> >> > > > Yep, I like that very much :)
> >> > >
> >> > > Actually if we want to check the group and there are only 2 interfaces we do
> >> > > not need to use the match callback at all, w e can simply match on the
> >> > > group of the interface which we do want:
> >> > >
> >> > > diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
> >> > > index db0f35be5a8b..21bd48f16033 100644
> >> > > --- a/drivers/hid/hid-ite.c
> >> > > +++ b/drivers/hid/hid-ite.c
> >> > > @@ -56,8 +56,9 @@ static const struct hid_device_id ite_devices[] = {
> >> > > { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
> >> > > { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
> >> > > /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
> >> > > - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
> >> > > - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
> >> > > + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
> >> > > + USB_VENDOR_ID_SYNAPTICS,
> >> > > + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
> >> > > { }
> >> > > };
> >> > > MODULE_DEVICE_TABLE(hid, ite_devices);
> >> > >
> >> > > Much cleaner
> >> >
> >> > yep
> >> >
> >> > > (and now I don't need to write a test, which is always
> >> > > a good motivation to come up with a cleaner solution :)
> >> >
> >> > Hehe, too bad, you already picked up my curiosity on this one, and I
> >> > really would like to see the report descriptors and some events of the
> >> > keys that are fixed by hid-ite.c.
> >> > <with a low voice>This will be a hard requirement to accept this patch </joke>.
> >> >
> >> > More seriously, Zdeněk, can you run hid-recorder from
> >> > https://gitlab.freedesktop.org/libevdev/hid-tools/ and provide me the
> >> > report descriptor for all of your ITE HID devices? I'll add the
> >> > matching tests in hid-tools and be sure we do not regress in the
> >> > future.
> >> >
> >> > >
> >> > > Let me turn this into a proper patch and then I will send that to
> >> > > Zdeněk (off-list) for him to test (note don't worry if you do
> >> > > not have time to test this weekend, then I'll do it on Monday).
> >> > >
> >> > > Regards,
> >> > >
> >> > > Hans
> >> > >
> >> > > p.s.
> >> > >
> >> > > 1. My train is approaching Brussels (Fosdem) so my email response
> >> > > time will soon become irregular.
> >> >
> >> > How dare you? :)
> >> >
> >> > >
> >> > > 2. Benjamin will you be at Fosdem too ?
> >> > >
> >> >
> >> > Unfortunately no. Already got my quota of meeting people for this year
> >> > between Kernel Recipes in September, XDC in October and LCA last week.
> >> > So I need to keep in a quiet environment for a little bit :)
> >> >
> >> > Cheers,
> >> > Benjamin
> >> >
> >>
>
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 6f1a4891a5928a5969c87fa5a584844c983ec823
Gitweb: https://git.kernel.org/tip/6f1a4891a5928a5969c87fa5a584844c983ec823
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Fri, 31 Jan 2020 15:26:52 +01:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Sat, 01 Feb 2020 09:31:47 +01:00
x86/apic/msi: Plug non-maskable MSI affinity race
Evan tracked down a subtle race between the update of the MSI message and
the device raising an interrupt internally on PCI devices which do not
support MSI masking. The update of the MSI message is non-atomic and
consists of either 2 or 3 sequential 32bit wide writes to the PCI config
space.
- Write address low 32bits
- Write address high 32bits (If supported by device)
- Write data
When an interrupt is migrated then both address and data might change, so
the kernel attempts to mask the MSI interrupt first. But for MSI masking is
optional, so there exist devices which do not provide it. That means that
if the device raises an interrupt internally between the writes then a MSI
message is sent built from half updated state.
On x86 this can lead to spurious interrupts on the wrong interrupt
vector when the affinity setting changes both address and data. As a
consequence the device interrupt can be lost causing the device to
become stuck or malfunctioning.
Evan tried to handle that by disabling MSI accross an MSI message
update. That's not feasible because disabling MSI has issues on its own:
If MSI is disabled the PCI device is routing an interrupt to the legacy
INTx mechanism. The INTx delivery can be disabled, but the disablement is
not working on all devices.
Some devices lose interrupts when both MSI and INTx delivery are disabled.
Another way to solve this would be to enforce the allocation of the same
vector on all CPUs in the system for this kind of screwed devices. That
could be done, but it would bring back the vector space exhaustion problems
which got solved a few years ago.
Fortunately the high address (if supported by the device) is only relevant
when X2APIC is enabled which implies interrupt remapping. In the interrupt
remapping case the affinity setting is happening at the interrupt remapping
unit and the PCI MSI message is programmed only once when the PCI device is
initialized.
That makes it possible to solve it with a two step update:
1) Target the MSI msg to the new vector on the current target CPU
2) Target the MSI msg to the new vector on the new target CPU
In both cases writing the MSI message is only changing a single 32bit word
which prevents the issue of inconsistency.
After writing the final destination it is necessary to check whether the
device issued an interrupt while the intermediate state #1 (new vector,
current CPU) was in effect.
This is possible because the affinity change is always happening on the
current target CPU. The code runs with interrupts disabled, so the
interrupt can be detected by checking the IRR of the local APIC. If the
vector is pending in the IRR then the interrupt is retriggered on the new
target CPU by sending an IPI for the associated vector on the target CPU.
This can cause spurious interrupts on both the local and the new target
CPU.
1) If the new vector is not in use on the local CPU and the device
affected by the affinity change raised an interrupt during the
transitional state (step #1 above) then interrupt entry code will
ignore that spurious interrupt. The vector is marked so that the
'No irq handler for vector' warning is supressed once.
2) If the new vector is in use already on the local CPU then the IRR check
might see an pending interrupt from the device which is using this
vector. The IPI to the new target CPU will then invoke the handler of
the device, which got the affinity change, even if that device did not
issue an interrupt
3) If the new vector is in use already on the local CPU and the device
affected by the affinity change raised an interrupt during the
transitional state (step #1 above) then the handler of the device which
uses that vector on the local CPU will be invoked.
expose issues in device driver interrupt handlers which are not prepared to
handle a spurious interrupt correctly. This not a regression, it's just
exposing something which was already broken as spurious interrupts can
happen for a lot of reasons and all driver handlers need to be able to deal
with them.
Reported-by: Evan Green <evgreen(a)chromium.org>
Debugged-by: Evan Green <evgreen(a)chromium.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Evan Green <evgreen(a)chromium.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/87imkr4s7n.fsf@nanos.tec.linutronix.de
---
arch/x86/include/asm/apic.h | 8 ++-
arch/x86/kernel/apic/msi.c | 128 ++++++++++++++++++++++++++++++++++-
include/linux/irq.h | 18 +++++-
include/linux/irqdomain.h | 7 ++-
kernel/irq/debugfs.c | 1 +-
kernel/irq/msi.c | 5 +-
6 files changed, 163 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index be0b9cf..19e94af 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -454,6 +454,14 @@ static inline void ack_APIC_irq(void)
apic_eoi();
}
+
+static inline bool lapic_vector_set_in_irr(unsigned int vector)
+{
+ u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+
+ return !!(irr & (1U << (vector % 32)));
+}
+
static inline unsigned default_get_apic_id(unsigned long x)
{
unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 7f75334..159bd0c 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -23,10 +23,8 @@
static struct irq_domain *msi_default_domain;
-static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg)
{
- struct irq_cfg *cfg = irqd_cfg(data);
-
msg->address_hi = MSI_ADDR_BASE_HI;
if (x2apic_enabled())
@@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
MSI_DATA_VECTOR(cfg->vector);
}
+static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ __irq_msi_compose_msg(irqd_cfg(data), msg);
+}
+
+static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg)
+{
+ struct msi_msg msg[2] = { [1] = { }, };
+
+ __irq_msi_compose_msg(cfg, msg);
+ irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg);
+}
+
+static int
+msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force)
+{
+ struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd);
+ struct irq_data *parent = irqd->parent_data;
+ unsigned int cpu;
+ int ret;
+
+ /* Save the current configuration */
+ cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd));
+ old_cfg = *cfg;
+
+ /* Allocate a new target vector */
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
+
+ /*
+ * For non-maskable and non-remapped MSI interrupts the migration
+ * to a different destination CPU and a different vector has to be
+ * done careful to handle the possible stray interrupt which can be
+ * caused by the non-atomic update of the address/data pair.
+ *
+ * Direct update is possible when:
+ * - The MSI is maskable (remapped MSI does not use this code path)).
+ * The quirk bit is not set in this case.
+ * - The new vector is the same as the old vector
+ * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up)
+ * - The new destination CPU is the same as the old destination CPU
+ */
+ if (!irqd_msi_nomask_quirk(irqd) ||
+ cfg->vector == old_cfg.vector ||
+ old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR ||
+ cfg->dest_apicid == old_cfg.dest_apicid) {
+ irq_msi_update_msg(irqd, cfg);
+ return ret;
+ }
+
+ /*
+ * Paranoia: Validate that the interrupt target is the local
+ * CPU.
+ */
+ if (WARN_ON_ONCE(cpu != smp_processor_id())) {
+ irq_msi_update_msg(irqd, cfg);
+ return ret;
+ }
+
+ /*
+ * Redirect the interrupt to the new vector on the current CPU
+ * first. This might cause a spurious interrupt on this vector if
+ * the device raises an interrupt right between this update and the
+ * update to the final destination CPU.
+ *
+ * If the vector is in use then the installed device handler will
+ * denote it as spurious which is no harm as this is a rare event
+ * and interrupt handlers have to cope with spurious interrupts
+ * anyway. If the vector is unused, then it is marked so it won't
+ * trigger the 'No irq handler for vector' warning in do_IRQ().
+ *
+ * This requires to hold vector lock to prevent concurrent updates to
+ * the affected vector.
+ */
+ lock_vector_lock();
+
+ /*
+ * Mark the new target vector on the local CPU if it is currently
+ * unused. Reuse the VECTOR_RETRIGGERED state which is also used in
+ * the CPU hotplug path for a similar purpose. This cannot be
+ * undone here as the current CPU has interrupts disabled and
+ * cannot handle the interrupt before the whole set_affinity()
+ * section is done. In the CPU unplug case, the current CPU is
+ * about to vanish and will not handle any interrupts anymore. The
+ * vector is cleaned up when the CPU comes online again.
+ */
+ if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector])))
+ this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED);
+
+ /* Redirect it to the new vector on the local CPU temporarily */
+ old_cfg.vector = cfg->vector;
+ irq_msi_update_msg(irqd, &old_cfg);
+
+ /* Now transition it to the target CPU */
+ irq_msi_update_msg(irqd, cfg);
+
+ /*
+ * All interrupts after this point are now targeted at the new
+ * vector/CPU.
+ *
+ * Drop vector lock before testing whether the temporary assignment
+ * to the local CPU was hit by an interrupt raised in the device,
+ * because the retrigger function acquires vector lock again.
+ */
+ unlock_vector_lock();
+
+ /*
+ * Check whether the transition raced with a device interrupt and
+ * is pending in the local APICs IRR. It is safe to do this outside
+ * of vector lock as the irq_desc::lock of this interrupt is still
+ * held and interrupts are disabled: The check is not accessing the
+ * underlying vector store. It's just checking the local APIC's
+ * IRR.
+ */
+ if (lapic_vector_set_in_irr(cfg->vector))
+ irq_data_get_irq_chip(irqd)->irq_retrigger(irqd);
+
+ return ret;
+}
+
/*
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
* which implement the MSI or MSI-X Capability Structure.
@@ -58,6 +177,7 @@ static struct irq_chip pci_msi_controller = {
.irq_ack = irq_chip_ack_parent,
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = irq_msi_compose_msg,
+ .irq_set_affinity = msi_set_affinity,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
@@ -146,6 +266,8 @@ void __init arch_init_msi_domain(struct irq_domain *parent)
}
if (!msi_default_domain)
pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
+ else
+ msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK;
}
#ifdef CONFIG_IRQ_REMAP
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7853eb9..3ed5a05 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -209,6 +209,8 @@ struct irq_data {
* IRQD_SINGLE_TARGET - IRQ allows only a single affinity target
* IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set
* IRQD_CAN_RESERVE - Can use reservation mode
+ * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change
+ * required
*/
enum {
IRQD_TRIGGER_MASK = 0xf,
@@ -231,6 +233,7 @@ enum {
IRQD_SINGLE_TARGET = (1 << 24),
IRQD_DEFAULT_TRIGGER_SET = (1 << 25),
IRQD_CAN_RESERVE = (1 << 26),
+ IRQD_MSI_NOMASK_QUIRK = (1 << 27),
};
#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -390,6 +393,21 @@ static inline bool irqd_can_reserve(struct irq_data *d)
return __irqd_to_state(d) & IRQD_CAN_RESERVE;
}
+static inline void irqd_set_msi_nomask_quirk(struct irq_data *d)
+{
+ __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK;
+}
+
+static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d)
+{
+ __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK;
+}
+
+static inline bool irqd_msi_nomask_quirk(struct irq_data *d)
+{
+ return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK;
+}
+
#undef __irqd_to_state
static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 3c340db..4da8df5 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -207,6 +207,13 @@ enum {
IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5),
/*
+ * Quirk to handle MSI implementations which do not provide
+ * masking. Currently known to affect x86, but partially
+ * handled in core code.
+ */
+ IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6),
+
+ /*
* Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
* for implementation specific purposes and ignored by the
* core code.
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index c1eccd4..a949bd3 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -114,6 +114,7 @@ static const struct irq_bit_descr irqdata_states[] = {
BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
BIT_MASK_DESCR(IRQD_CAN_RESERVE),
+ BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK),
BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index ad26fbc..eb95f61 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -453,8 +453,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
continue;
irq_data = irq_domain_get_irq_data(domain, desc->irq);
- if (!can_reserve)
+ if (!can_reserve) {
irqd_clr_can_reserve(irq_data);
+ if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK)
+ irqd_set_msi_nomask_quirk(irq_data);
+ }
ret = irq_domain_activate_irq(irq_data, can_reserve);
if (ret)
goto cleanup;
The patch titled
Subject: lib/test_kasan.c: fix memory leak in kmalloc_oob_krealloc_more()
has been removed from the -mm tree. Its filename was
lib-test_kasanc-fix-memory-leak-in-kmalloc_oob_krealloc_more.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: "Gustavo A. R. Silva" <gustavo(a)embeddedor.com>
Subject: lib/test_kasan.c: fix memory leak in kmalloc_oob_krealloc_more()
In case memory resources for _ptr2_ were allocated, release them before
return.
Notice that in case _ptr1_ happens to be NULL, krealloc() behaves exactly
like kmalloc().
Addresses-Coverity-ID: 1490594 ("Resource leak")
Link: http://lkml.kernel.org/r/20200123160115.GA4202@embeddedor
Fixes: 3f15801cdc23 ("lib: add kasan test module")
Signed-off-by: Gustavo A. R. Silva <gustavo(a)embeddedor.com>
Reviewed-by: Dmitry Vyukov <dvyukov(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/test_kasan.c | 1 +
1 file changed, 1 insertion(+)
--- a/lib/test_kasan.c~lib-test_kasanc-fix-memory-leak-in-kmalloc_oob_krealloc_more
+++ a/lib/test_kasan.c
@@ -158,6 +158,7 @@ static noinline void __init kmalloc_oob_
if (!ptr1 || !ptr2) {
pr_err("Allocation failed\n");
kfree(ptr1);
+ kfree(ptr2);
return;
}
_
Patches currently in -mm which might be from gustavo(a)embeddedor.com are
The patch titled
Subject: media/v4l2-core: set pages dirty upon releasing DMA buffers
has been removed from the -mm tree. Its filename was
media-v4l2-core-set-pages-dirty-upon-releasing-dma-buffers.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: John Hubbard <jhubbard(a)nvidia.com>
Subject: media/v4l2-core: set pages dirty upon releasing DMA buffers
After DMA is complete, and the device and CPU caches are synchronized,
it's still required to mark the CPU pages as dirty, if the data was coming
from the device. However, this driver was just issuing a bare put_page()
call, without any set_page_dirty*() call.
Fix the problem, by calling set_page_dirty_lock() if the CPU pages were
potentially receiving data from the device.
Link: http://lkml.kernel.org/r/20200107224558.2362728-11-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Acked-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Cc: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Cc: Alex Williamson <alex.williamson(a)redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Cc: Björn Töpel <bjorn.topel(a)intel.com>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: Jason Gunthorpe <jgg(a)ziepe.ca>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Kirill A. Shutemov <kirill(a)shutemov.name>
Cc: Leon Romanovsky <leonro(a)mellanox.com>
Cc: Mike Rapoport <rppt(a)linux.ibm.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/media/v4l2-core/videobuf-dma-sg.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c~media-v4l2-core-set-pages-dirty-upon-releasing-dma-buffers
+++ a/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -349,8 +349,11 @@ int videobuf_dma_free(struct videobuf_dm
BUG_ON(dma->sglen);
if (dma->pages) {
- for (i = 0; i < dma->nr_pages; i++)
+ for (i = 0; i < dma->nr_pages; i++) {
+ if (dma->direction == DMA_FROM_DEVICE)
+ set_page_dirty_lock(dma->pages[i]);
put_page(dma->pages[i]);
+ }
kfree(dma->pages);
dma->pages = NULL;
}
_
Patches currently in -mm which might be from jhubbard(a)nvidia.com are
The patch titled
Subject: mm: move_pages: report the number of non-attempted pages
has been removed from the -mm tree. Its filename was
mm-move_pages-report-the-number-of-non-attempted-pages.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Yang Shi <yang.shi(a)linux.alibaba.com>
Subject: mm: move_pages: report the number of non-attempted pages
Since commit a49bd4d71637 ("mm, numa: rework do_pages_move"), the semantic
of move_pages() has changed to return the number of non-migrated pages if
they were result of a non-fatal reasons (usually a busy page). This was
an unintentional change that hasn't been noticed except for LTP tests
which checked for the documented behavior.
There are two ways to go around this change. We can even get back to the
original behavior and return -EAGAIN whenever migrate_pages is not able to
migrate pages due to non-fatal reasons. Another option would be to simply
continue with the changed semantic and extend move_pages documentation to
clarify that -errno is returned on an invalid input or when migration
simply cannot succeed (e.g. -ENOMEM, -EBUSY) or the number of pages that
couldn't have been migrated due to ephemeral reasons (e.g. page is pinned
or locked for other reasons).
This patch implements the second option because this behavior is in place
for some time without anybody complaining and possibly new users depending
on it. Also it allows to have a slightly easier error handling as the
caller knows that it is worth to retry when err > 0.
But since the new semantic would be aborted immediately if migration is
failed due to ephemeral reasons, need include the number of non-attempted
pages in the return value too.
Link: http://lkml.kernel.org/r/1580160527-109104-1-git-send-email-yang.shi@linux.…
Fixes: a49bd4d71637 ("mm, numa: rework do_pages_move")
Signed-off-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Suggested-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Wei Yang <richardw.yang(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [4.17+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/migrate.c | 25 +++++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
--- a/mm/migrate.c~mm-move_pages-report-the-number-of-non-attempted-pages
+++ a/mm/migrate.c
@@ -1627,8 +1627,19 @@ static int do_pages_move(struct mm_struc
start = i;
} else if (node != current_node) {
err = do_move_pages_to_node(mm, &pagelist, current_node);
- if (err)
+ if (err) {
+ /*
+ * Positive err means the number of failed
+ * pages to migrate. Since we are going to
+ * abort and return the number of non-migrated
+ * pages, so need to incude the rest of the
+ * nr_pages that have not been attempted as
+ * well.
+ */
+ if (err > 0)
+ err += nr_pages - i - 1;
goto out;
+ }
err = store_status(status, start, current_node, i - start);
if (err)
goto out;
@@ -1659,8 +1670,11 @@ static int do_pages_move(struct mm_struc
goto out_flush;
err = do_move_pages_to_node(mm, &pagelist, current_node);
- if (err)
+ if (err) {
+ if (err > 0)
+ err += nr_pages - i - 1;
goto out;
+ }
if (i > start) {
err = store_status(status, start, current_node, i - start);
if (err)
@@ -1674,6 +1688,13 @@ out_flush:
/* Make sure we do not overwrite the existing error */
err1 = do_move_pages_to_node(mm, &pagelist, current_node);
+ /*
+ * Don't have to report non-attempted pages here since:
+ * - If the above loop is done gracefully all pages have been
+ * attempted.
+ * - If the above loop is aborted it means a fatal error
+ * happened, should return ret.
+ */
if (!err1)
err1 = store_status(status, start, current_node, i - start);
if (err >= 0)
_
Patches currently in -mm which might be from yang.shi(a)linux.alibaba.com are
The patch titled
Subject: mm: thp: don't need care deferred split queue in memcg charge move path
has been removed from the -mm tree. Its filename was
mm-thp-remove-the-defer-list-related-code-since-this-will-not-happen.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Wei Yang <richardw.yang(a)linux.intel.com>
Subject: mm: thp: don't need care deferred split queue in memcg charge move path
If compound is true, this means it is a PMD mapped THP. Which implies
the page is not linked to any defer list. So the first code chunk will
not be executed.
Also with this reason, it would not be proper to add this page to a
defer list. So the second code chunk is not correct.
Based on this, we should remove the defer list related code.
[yang.shi(a)linux.alibaba.com: better patch title]
Link: http://lkml.kernel.org/r/20200117233836.3434-1-richardw.yang@linux.intel.com
Fixes: 87eaceb3faa5 ("mm: thp: make deferred split shrinker memcg aware")
Signed-off-by: Wei Yang <richardw.yang(a)linux.intel.com>
Suggested-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Acked-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 18 ------------------
1 file changed, 18 deletions(-)
--- a/mm/memcontrol.c~mm-thp-remove-the-defer-list-related-code-since-this-will-not-happen
+++ a/mm/memcontrol.c
@@ -5340,14 +5340,6 @@ static int mem_cgroup_move_account(struc
__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (compound && !list_empty(page_deferred_list(page))) {
- spin_lock(&from->deferred_split_queue.split_queue_lock);
- list_del_init(page_deferred_list(page));
- from->deferred_split_queue.split_queue_len--;
- spin_unlock(&from->deferred_split_queue.split_queue_lock);
- }
-#endif
/*
* It is safe to change page->mem_cgroup here because the page
* is referenced, charged, and isolated - we can't race with
@@ -5357,16 +5349,6 @@ static int mem_cgroup_move_account(struc
/* caller should have done css_get */
page->mem_cgroup = to;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (compound && list_empty(page_deferred_list(page))) {
- spin_lock(&to->deferred_split_queue.split_queue_lock);
- list_add_tail(page_deferred_list(page),
- &to->deferred_split_queue.split_queue);
- to->deferred_split_queue.split_queue_len++;
- spin_unlock(&to->deferred_split_queue.split_queue_lock);
- }
-#endif
-
spin_unlock_irqrestore(&from->move_lock, flags);
ret = 0;
_
Patches currently in -mm which might be from richardw.yang(a)linux.intel.com are
The patch titled
Subject: mm/memory_hotplug: fix remove_memory() lockdep splat
has been removed from the -mm tree. Its filename was
mm-memory_hotplug-fix-remove_memory-lockdep-splat.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Dan Williams <dan.j.williams(a)intel.com>
Subject: mm/memory_hotplug: fix remove_memory() lockdep splat
The daxctl unit test for the dax_kmem driver currently triggers the (false
positive) lockdep splat below. It results from the fact that
remove_memory_block_devices() is invoked under the mem_hotplug_lock()
causing lockdep entanglements with cpu_hotplug_lock() and sysfs (kernfs
active state tracking). It is a false positive because the sysfs
attribute path triggering the memory remove is not the same attribute path
associated with memory-block device.
sysfs_break_active_protection() is not applicable since there is no real
deadlock conflict, instead move memory-block device removal outside the
lock. The mem_hotplug_lock() is not needed to synchronize the
memory-block device removal vs the page online state, that is already
handled by lock_device_hotplug(). Specifically, lock_device_hotplug() is
sufficient to allow try_remove_memory() to check the offline state of the
memblocks and be assured that any in progress online attempts are flushed
/ blocked by kernfs_drain() / attribute removal.
The add_memory() path safely creates memblock devices under the
mem_hotplug_lock(). There is no kernfs active state synchronization in
the memblock device_register() path, so nothing to fix there.
This change is only possible thanks to the recent change that refactored
memory block device removal out of arch_remove_memory() (commit
4c4b7f9ba948 mm/memory_hotplug: remove memory block devices before
arch_remove_memory()), and David's due diligence tracking down the
guarantees afforded by kernfs_drain(). Not flagged for -stable since this
only impacts ongoing development and lockdep validation, not a runtime
issue.
======================================================
WARNING: possible circular locking dependency detected
5.5.0-rc3+ #230 Tainted: G OE
------------------------------------------------------
lt-daxctl/6459 is trying to acquire lock:
ffff99c7f0003510 (kn->count#241){++++}, at: kernfs_remove_by_name_ns+0x41/0x80
but task is already holding lock:
ffffffffa76a5450 (mem_hotplug_lock.rw_sem){++++}, at: percpu_down_write+0x20/0xe0
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (mem_hotplug_lock.rw_sem){++++}:
__lock_acquire+0x39c/0x790
lock_acquire+0xa2/0x1b0
get_online_mems+0x3e/0xb0
kmem_cache_create_usercopy+0x2e/0x260
kmem_cache_create+0x12/0x20
ptlock_cache_init+0x20/0x28
start_kernel+0x243/0x547
secondary_startup_64+0xb6/0xc0
-> #1 (cpu_hotplug_lock.rw_sem){++++}:
__lock_acquire+0x39c/0x790
lock_acquire+0xa2/0x1b0
cpus_read_lock+0x3e/0xb0
online_pages+0x37/0x300
memory_subsys_online+0x17d/0x1c0
device_online+0x60/0x80
state_store+0x65/0xd0
kernfs_fop_write+0xcf/0x1c0
vfs_write+0xdb/0x1d0
ksys_write+0x65/0xe0
do_syscall_64+0x5c/0xa0
entry_SYSCALL_64_after_hwframe+0x49/0xbe
-> #0 (kn->count#241){++++}:
check_prev_add+0x98/0xa40
validate_chain+0x576/0x860
__lock_acquire+0x39c/0x790
lock_acquire+0xa2/0x1b0
__kernfs_remove+0x25f/0x2e0
kernfs_remove_by_name_ns+0x41/0x80
remove_files.isra.0+0x30/0x70
sysfs_remove_group+0x3d/0x80
sysfs_remove_groups+0x29/0x40
device_remove_attrs+0x39/0x70
device_del+0x16a/0x3f0
device_unregister+0x16/0x60
remove_memory_block_devices+0x82/0xb0
try_remove_memory+0xb5/0x130
remove_memory+0x26/0x40
dev_dax_kmem_remove+0x44/0x6a [kmem]
device_release_driver_internal+0xe4/0x1c0
unbind_store+0xef/0x120
kernfs_fop_write+0xcf/0x1c0
vfs_write+0xdb/0x1d0
ksys_write+0x65/0xe0
do_syscall_64+0x5c/0xa0
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Chain exists of:
kn->count#241 --> cpu_hotplug_lock.rw_sem --> mem_hotplug_lock.rw_sem
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(mem_hotplug_lock.rw_sem);
lock(cpu_hotplug_lock.rw_sem);
lock(mem_hotplug_lock.rw_sem);
lock(kn->count#241);
*** DEADLOCK ***
No fixes tag as this has been a long standing issue that predated the
addition of kernfs lockdep annotations.
Link: http://lkml.kernel.org/r/157991441887.2763922.4770790047389427325.stgit@dwi…
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
--- a/mm/memory_hotplug.c~mm-memory_hotplug-fix-remove_memory-lockdep-splat
+++ a/mm/memory_hotplug.c
@@ -1764,8 +1764,6 @@ static int __ref try_remove_memory(int n
BUG_ON(check_hotplug_memory_range(start, size));
- mem_hotplug_begin();
-
/*
* All memory blocks must be offlined before removing memory. Check
* whether all memory blocks in question are offline and return error
@@ -1778,9 +1776,14 @@ static int __ref try_remove_memory(int n
/* remove memmap entry */
firmware_map_remove(start, start + size, "System RAM");
- /* remove memory block devices before removing memory */
+ /*
+ * Memory block device removal under the device_hotplug_lock is
+ * a barrier against racing online attempts.
+ */
remove_memory_block_devices(start, size);
+ mem_hotplug_begin();
+
arch_remove_memory(nid, start, size, NULL);
memblock_free(start, size);
memblock_remove(start, size);
_
Patches currently in -mm which might be from dan.j.williams(a)intel.com are
The patch titled
Subject: mm/migrate.c: also overwrite error when it is bigger than zero
has been removed from the -mm tree. Its filename was
mm-migratec-also-overwrite-error-when-it-is-bigger-than-zero.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Wei Yang <richardw.yang(a)linux.intel.com>
Subject: mm/migrate.c: also overwrite error when it is bigger than zero
If we get here after successfully adding page to list, err would be 1 to
indicate the page is queued in the list.
Current code has two problems:
* on success, 0 is not returned
* on error, if add_page_for_migratioin() return 1, and the following err1
from do_move_pages_to_node() is set, the err1 is not returned since err
is 1
And these behaviors break the user interface.
Link: http://lkml.kernel.org/r/20200119065753.21694-1-richardw.yang@linux.intel.c…
Fixes: e0153fc2c760 ("mm: move_pages: return valid node id in status if the page is already on the target node").
Signed-off-by: Wei Yang <richardw.yang(a)linux.intel.com>
Acked-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/migrate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/migrate.c~mm-migratec-also-overwrite-error-when-it-is-bigger-than-zero
+++ a/mm/migrate.c
@@ -1676,7 +1676,7 @@ out_flush:
err1 = do_move_pages_to_node(mm, &pagelist, current_node);
if (!err1)
err1 = store_status(status, start, current_node, i - start);
- if (!err)
+ if (err >= 0)
err = err1;
out:
return err;
_
Patches currently in -mm which might be from richardw.yang(a)linux.intel.com are
The patch titled
Subject: mm/sparse.c: reset section's mem_map when fully deactivated
has been removed from the -mm tree. Its filename was
mm-sparse-reset-sections-mem_map-when-fully-deactivated.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Pingfan Liu <kernelfans(a)gmail.com>
Subject: mm/sparse.c: reset section's mem_map when fully deactivated
After commit ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug"),
when a mem section is fully deactivated, section_mem_map still records the
section's start pfn, which is not used any more and will be reassigned
during re-addition.
In analogy with alloc/free pattern, it is better to clear all fields of
section_mem_map.
Beside this, it breaks the user space tool "makedumpfile" [1], which makes
assumption that a hot-removed section has mem_map as NULL, instead of
checking directly against SECTION_MARKED_PRESENT bit. (makedumpfile will
be better to change the assumption, and need a patch)
The bug can be reproduced on IBM POWERVM by "drmgr -c mem -r -q 5" ,
trigger a crash, and save vmcore by makedumpfile
[1]: makedumpfile, commit e73016540293 ("[v1.6.7] Update version")
Link: http://lkml.kernel.org/r/1579487594-28889-1-git-send-email-kernelfans@gmail…
Signed-off-by: Pingfan Liu <kernelfans(a)gmail.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Qian Cai <cai(a)lca.pw>
Cc: Kazuhito Hagio <k-hagio(a)ab.jp.nec.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/sparse.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/sparse.c~mm-sparse-reset-sections-mem_map-when-fully-deactivated
+++ a/mm/sparse.c
@@ -789,7 +789,7 @@ static void section_deactivate(unsigned
ms->usage = NULL;
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr);
+ ms->section_mem_map = (unsigned long)NULL;
}
if (section_is_early && memmap)
_
Patches currently in -mm which might be from kernelfans(a)gmail.com are
The patch titled
Subject: mm/mempolicy.c: fix out of bounds write in mpol_parse_str()
has been removed from the -mm tree. Its filename was
mm-mempolicyc-fix-out-of-bounds-write-in-mpol_parse_str.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Dan Carpenter <dan.carpenter(a)oracle.com>
Subject: mm/mempolicy.c: fix out of bounds write in mpol_parse_str()
What we are trying to do is change the '=' character to a NUL terminator
and then at the end of the function we restore it back to an '='. The
problem is there are two error paths where we jump to the end of the
function before we have replaced the '=' with NUL. We end up putting the
'=' in the wrong place (possibly one element before the start of the
buffer).
Link: http://lkml.kernel.org/r/20200115055426.vdjwvry44nfug7yy@kili.mountain
Reported-by: syzbot+e64a13c5369a194d67df(a)syzkaller.appspotmail.com
Fixes: 095f1fc4ebf3 ("mempolicy: rework shmem mpol parsing and display")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Dmitry Vyukov <dvyukov(a)google.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Dan Carpenter <dan.carpenter(a)oracle.com>
Cc: Lee Schermerhorn <lee.schermerhorn(a)hp.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mempolicy.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/mm/mempolicy.c~mm-mempolicyc-fix-out-of-bounds-write-in-mpol_parse_str
+++ a/mm/mempolicy.c
@@ -2821,6 +2821,9 @@ int mpol_parse_str(char *str, struct mem
char *flags = strchr(str, '=');
int err = 1, mode;
+ if (flags)
+ *flags++ = '\0'; /* terminate mode string */
+
if (nodelist) {
/* NUL-terminate mode or flags string */
*nodelist++ = '\0';
@@ -2831,9 +2834,6 @@ int mpol_parse_str(char *str, struct mem
} else
nodes_clear(nodes);
- if (flags)
- *flags++ = '\0'; /* terminate mode string */
-
mode = match_string(policy_modes, MPOL_MAX, str);
if (mode < 0)
goto out;
_
Patches currently in -mm which might be from dan.carpenter(a)oracle.com are
The patch titled
Subject: memcg: fix a crash in wb_workfn when a device disappears
has been removed from the -mm tree. Its filename was
memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: "Theodore Ts'o" <tytso(a)mit.edu>
Subject: memcg: fix a crash in wb_workfn when a device disappears
Without memcg, there is a one-to-one mapping between the bdi and
bdi_writeback structures. In this world, things are fairly
straightforward; the first thing bdi_unregister() does is to shutdown the
bdi_writeback structure (or wb), and part of that writeback ensures that
no other work queued against the wb, and that the wb is fully drained.
With memcg, however, there is a one-to-many relationship between the bdi
and bdi_writeback structures; that is, there are multiple wb objects which
can all point to a single bdi. There is a refcount which prevents the bdi
object from being released (and hence, unregistered). So in theory, the
bdi_unregister() *should* only get called once its refcount goes to zero
(bdi_put will drop the refcount, and when it is zero, release_bdi gets
called, which calls bdi_unregister).
Unfortunately, del_gendisk() in block/gen_hd.c never got the memo about
the Brave New memcg World, and calls bdi_unregister directly. It does
this without informing the file system, or the memcg code, or anything
else. This causes the root wb associated with the bdi to be unregistered,
but none of the memcg-specific wb's are shutdown. So when one of these
wb's are woken up to do delayed work, they try to dereference their
wb->bdi->dev to fetch the device name, but unfortunately bdi->dev is now
NULL, thanks to the bdi_unregister() called by del_gendisk(). As a
result, *boom*.
Fortunately, it looks like the rest of the writeback path is perfectly
happy with bdi->dev and bdi->owner being NULL, so the simplest fix is to
create a bdi_dev_name() function which can handle bdi->dev being NULL.
This also allows us to bulletproof the writeback tracepoints to prevent
them from dereferencing a NULL pointer and crashing the kernel if one is
tracing with memcg's enabled, and an iSCSI device dies or a USB storage
stick is pulled.
The most common way of triggering this will be hotremoval of a device
while writeback with memcg enabled is going on. It was triggering several
times a day in a heavily loaded production environment.
Google Bug Id: 145475544
Link: https://lore.kernel.org/r/20191227194829.150110-1-tytso@mit.edu
Link: http://lkml.kernel.org/r/20191228005211.163952-1-tytso@mit.edu
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: Chris Mason <clm(a)fb.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/fs-writeback.c | 2 -
include/linux/backing-dev.h | 10 +++++++
include/trace/events/writeback.h | 37 +++++++++++++----------------
mm/backing-dev.c | 1
4 files changed, 29 insertions(+), 21 deletions(-)
--- a/fs/fs-writeback.c~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/fs/fs-writeback.c
@@ -2063,7 +2063,7 @@ void wb_workfn(struct work_struct *work)
struct bdi_writeback, dwork);
long pages_written;
- set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
+ set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
--- a/include/linux/backing-dev.h~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/include/linux/backing-dev.h
@@ -13,6 +13,7 @@
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/blkdev.h>
+#include <linux/device.h>
#include <linux/writeback.h>
#include <linux/blk-cgroup.h>
#include <linux/backing-dev-defs.h>
@@ -504,4 +505,13 @@ static inline int bdi_rw_congested(struc
(1 << WB_async_congested));
}
+extern const char *bdi_unknown_name;
+
+static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
+{
+ if (!bdi || !bdi->dev)
+ return bdi_unknown_name;
+ return dev_name(bdi->dev);
+}
+
#endif /* _LINUX_BACKING_DEV_H */
--- a/include/trace/events/writeback.h~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/include/trace/events/writeback.h
@@ -67,8 +67,8 @@ DECLARE_EVENT_CLASS(writeback_page_templ
TP_fast_assign(
strscpy_pad(__entry->name,
- mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)",
- 32);
+ bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
+ NULL), 32);
__entry->ino = mapping ? mapping->host->i_ino : 0;
__entry->index = page->index;
),
@@ -111,8 +111,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inod
struct backing_dev_info *bdi = inode_to_bdi(inode);
/* may be called for files on pseudo FSes w/ unregistered bdi */
- strscpy_pad(__entry->name,
- bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->flags = flags;
@@ -193,7 +192,7 @@ TRACE_EVENT(inode_foreign_history,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(inode_to_bdi(inode)->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
__entry->history = history;
@@ -222,7 +221,7 @@ TRACE_EVENT(inode_switch_wbs,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(old_wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(old_wb->bdi), 32);
__entry->ino = inode->i_ino;
__entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb);
__entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb);
@@ -255,7 +254,7 @@ TRACE_EVENT(track_foreign_dirty,
struct address_space *mapping = page_mapping(page);
struct inode *inode = mapping ? mapping->host : NULL;
- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->bdi_id = wb->bdi->id;
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
@@ -288,7 +287,7 @@ TRACE_EVENT(flush_foreign,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
__entry->frn_bdi_id = frn_bdi_id;
__entry->frn_memcg_id = frn_memcg_id;
@@ -318,7 +317,7 @@ DECLARE_EVENT_CLASS(writeback_write_inod
TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->sync_mode = wbc->sync_mode;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
@@ -361,9 +360,7 @@ DECLARE_EVENT_CLASS(writeback_work_class
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
- strscpy_pad(__entry->name,
- wb->bdi->dev ? dev_name(wb->bdi->dev) :
- "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->nr_pages = work->nr_pages;
__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
__entry->sync_mode = work->sync_mode;
@@ -416,7 +413,7 @@ DECLARE_EVENT_CLASS(writeback_class,
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: cgroup_ino=%lu",
@@ -438,7 +435,7 @@ TRACE_EVENT(writeback_bdi_register,
__array(char, name, 32)
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
),
TP_printk("bdi %s",
__entry->name
@@ -463,7 +460,7 @@ DECLARE_EVENT_CLASS(wbc_class,
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->sync_mode = wbc->sync_mode;
@@ -514,7 +511,7 @@ TRACE_EVENT(writeback_queue_io,
),
TP_fast_assign(
unsigned long *older_than_this = work->older_than_this;
- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->older = older_than_this ? *older_than_this : 0;
__entry->age = older_than_this ?
(jiffies - *older_than_this) * 1000 / HZ : -1;
@@ -600,7 +597,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
),
TP_fast_assign(
- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->write_bw = KBps(wb->write_bandwidth);
__entry->avg_write_bw = KBps(wb->avg_write_bandwidth);
__entry->dirty_rate = KBps(dirty_rate);
@@ -665,7 +662,7 @@ TRACE_EVENT(balance_dirty_pages,
TP_fast_assign(
unsigned long freerun = (thresh + bg_thresh) / 2;
- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_wb_domain.dirty_limit +
@@ -726,7 +723,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
@@ -800,7 +797,7 @@ DECLARE_EVENT_CLASS(writeback_single_ino
TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
--- a/mm/backing-dev.c~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/mm/backing-dev.c
@@ -21,6 +21,7 @@ struct backing_dev_info noop_backing_dev
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
static struct class *bdi_class;
+const char *bdi_unknown_name = "(unknown)";
/*
* bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
_
Patches currently in -mm which might be from tytso(a)mit.edu are
On Fri, Jan 31, 2020 at 6:17 PM Z R <zdenda.rampas(a)gmail.com> wrote:
>
> Hi Benjamin,
> in last log touchpad.log (omg, should be keyboard.log), I pressed fn-f3 multiple times. I got one two liner:
>
> # ReportID: 3 / Wireless Radio Button: 0 | #
> E: 000007.606583 2 03 00
great, thanks.
>
> every time i release f3. Does not matter what is happening with fn-key. (could be released already or still pushed down). This log was collected with last patch from Hans applied.
Actually, it doesn't really matter if the patch from Hans was applied
or not. I am looking at the raw event from the device before kernel
processing :)
May I ask you one more thing: can you run `libinput record` (as root)
on the touchpad? I need to know how many fingers the touchpad is
capable to detect, as right now, my tests are failing because of that.
>
> Sorry for the mess I caused :-) I still don't get how you guys manage to have your emails so well polished and readable.
No worries. Having a responsive user is much more valuable than
someone who takes ages to reply even in a clear and well polished
message :)
Cheers,
Benjamin
>
> Bye
> Zdeněk
>
> pá 31. 1. 2020 v 18:00 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com> napsal:
>>
>> On Fri, Jan 31, 2020 at 5:09 PM Z R <zdenda.rampas(a)gmail.com> wrote:
>> >
>> > I believe I pressed wifi button on both replays for keyboard.
>>
>> Yep, I can see that. Just to double check, on the last log, you
>> pressed the wifi button twice?
>>
>> Anyway, thanks for all the logs, I should have enough to implement the
>> regression tests.
>>
>> Cheers,
>> Benjamin
>>
>> >
>> > With latest patch from Hans on top of v5.5.0 touchpads "two finger scrolling" is working again. Attaching current hid-record for keyboard with Wifi button pressed. Events in log appeared after f3 button was "released".
>> >
>> > Thanks
>> >
>> > Zdeněk
>> >
>> > pá 31. 1. 2020 v 16:45 odesílatel Hans de Goede <hdegoede(a)redhat.com> napsal:
>> >>
>> >> Hi,
>> >>
>> >> On 1/31/20 4:38 PM, Z R wrote:
>> >> > Hi Benjamin,
>> >> > hid-record for keyboard and touchpad. With Commit 8f18eca9ebc5 reverted and from unmodified kernel.
>> >> >
>> >> > I hope it is what you asked for :-)
>> >> >
>> >> > Currently waiting for reworked patch from Hans.
>> >>
>> >> I just send you the reworked patch.
>> >>
>> >> Does the recordning include pressing of the wlan on/off key (Fn + F3 I believe) ?
>> >> That is the whole reason why the special hid-ite driver is necessary.
>> >>
>> >> Benjamin about the wlan on/off key. AFAICR on a press + release of the key a
>> >> single hid input report for the generic-desktop Wireless Radio Controls group
>> >> is send. This input-report only has the one button with usage code HID_GD_RFKILL_BTN
>> >> in there and it is always 0. It is as if the input-report is only send on release
>> >> and not on press. So the hid-ite code emulates a press + release whenever the
>> >> input-report is send.
>> >>
>> >> IOW the receiving of the input report is (ab)used as indication of the button
>> >> having been pressed.
>> >>
>> >> Regards,
>> >>
>> >> Hans
>> >>
>> >>
>> >> >
>> >> > Bye for now
>> >> > Zdeněk
>> >> >
>> >> > pá 31. 1. 2020 v 15:12 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com>> napsal:
>> >> >
>> >> > On Fri, Jan 31, 2020 at 3:04 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> >> > >
>> >> > > Hi,
>> >> > >
>> >> > > On 1/31/20 2:54 PM, Benjamin Tissoires wrote:
>> >> > > > On Fri, Jan 31, 2020 at 2:41 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> >> > > >>
>> >> > > >> Hi,
>> >> > > >>
>> >> > > >> On 1/31/20 2:10 PM, Benjamin Tissoires wrote:
>> >> > > >>> Hi Hans,
>> >> > > >>>
>> >> > > >>> On Fri, Jan 31, 2020 at 1:46 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> >> > > >>>>
>> >> > > >>>> Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
>> >> > > >>>> dock") added the USB id for the Acer SW5-012's keyboard dock to the
>> >> > > >>>> hid-ite driver to fix the rfkill driver not working.
>> >> > > >>>>
>> >> > > >>>> Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
>> >> > > >>>> "Wireless Radio Control" bits which need the special hid-ite driver on the
>> >> > > >>>> second USB interface (the mouse interface) and their touchpad only supports
>> >> > > >>>> mouse emulation, so using generic hid-input handling for anything but
>> >> > > >>>> the "Wireless Radio Control" bits is fine. On these devices we simply bind
>> >> > > >>>> to all USB interfaces.
>> >> > > >>>>
>> >> > > >>>> But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
>> >> > > >>>> (SW5-012)'s touchpad not only does mouse emulation it also supports
>> >> > > >>>> HID-multitouch and all the keys including the "Wireless Radio Control"
>> >> > > >>>> bits have been moved to the first USB interface (the keyboard intf).
>> >> > > >>>>
>> >> > > >>>> So we need hid-ite to handle the first (keyboard) USB interface and have
>> >> > > >>>> it NOT bind to the second (mouse) USB interface so that that can be
>> >> > > >>>> handled by hid-multitouch.c and we get proper multi-touch support.
>> >> > > >>>>
>> >> > > >>>> This commit adds a match callback to hid-ite which makes it only
>> >> > > >>>> match the first USB interface when running on the Acer SW5-012,
>> >> > > >>>> fixing the regression to mouse-emulation mode introduced by adding the
>> >> > > >>>> keyboard dock USB id.
>> >> > > >>>>
>> >> > > >>>> Note the match function only does the special only bind to the first
>> >> > > >>>> USB interface on the Acer SW5-012, on other devices the hid-ite driver
>> >> > > >>>> actually must bind to the second interface as that is where the
>> >> > > >>>> "Wireless Radio Control" bits are.
>> >> > > >>>
>> >> > > >>> This is not a full review, but a couple of things that popped out
>> >> > > >>> while scrolling through the patch.
>> >> > > >>>
>> >> > > >>>>
>> >> > > >>>> Cc: stable(a)vger.kernel.org <mailto:stable@vger.kernel.org>
>> >> > > >>>> Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
>> >> > > >>>> Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com>>
>> >> > > >>>> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>>
>> >> > > >>>> ---
>> >> > > >>>> drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
>> >> > > >>>> 1 file changed, 34 insertions(+)
>> >> > > >>>>
>> >> > > >>>> diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
>> >> > > >>>> index c436e12feb23..69a4ddfd033d 100644
>> >> > > >>>> --- a/drivers/hid/hid-ite.c
>> >> > > >>>> +++ b/drivers/hid/hid-ite.c
>> >> > > >>>> @@ -8,9 +8,12 @@
>> >> > > >>>> #include <linux/input.h>
>> >> > > >>>> #include <linux/hid.h>
>> >> > > >>>> #include <linux/module.h>
>> >> > > >>>> +#include <linux/usb.h>
>> >> > > >>>>
>> >> > > >>>> #include "hid-ids.h"
>> >> > > >>>>
>> >> > > >>>> +#define ITE8595_KBD_USB_INTF 0
>> >> > > >>>> +
>> >> > > >>>> static int ite_event(struct hid_device *hdev, struct hid_field *field,
>> >> > > >>>> struct hid_usage *usage, __s32 value)
>> >> > > >>>> {
>> >> > > >>>> @@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
>> >> > > >>>> return 0;
>> >> > > >>>> }
>> >> > > >>>>
>> >> > > >>>> +static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
>> >> > > >>>> +{
>> >> > > >>>> + struct usb_interface *intf;
>> >> > > >>>> +
>> >> > > >>>> + if (ignore_special_driver)
>> >> > > >>>> + return false;
>> >> > > >>>> +
>> >> > > >>>> + /*
>> >> > > >>>> + * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
>> >> > > >>>> + * have the "Wireless Radio Control" bits which need this special
>> >> > > >>>> + * driver on the second USB interface (the mouse interface). On
>> >> > > >>>> + * these devices we simply bind to all USB interfaces.
>> >> > > >>>> + *
>> >> > > >>>> + * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
>> >> > > >>>> + * not only does mouse emulation it also supports HID-multitouch
>> >> > > >>>> + * and all the keys including the "Wireless Radio Control" bits
>> >> > > >>>> + * have been moved to the first USB interface (the keyboard intf).
>> >> > > >>>> + *
>> >> > > >>>> + * We want the hid-multitouch driver to bind to the touchpad, so on
>> >> > > >>>> + * the Acer SW5-012 we should only bind to the keyboard USB intf.
>> >> > > >>>> + */
>> >> > > >>>> + if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
>> >> > > >>>> + hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
>> >> > > >>>
>> >> > > >>> Isn't there an existing matching function we can use here, instead of
>> >> > > >>> checking each individual field?
>> >> > > >>
>> >> > > >> There is hid_match_one_id() but that is not exported (can be fixed) and it
>> >> > > >> requires a struct hid_device_id, which either requires declaring an extra
>> >> > > >> standalone struct hid_device_id for the SW5-012 kbd-dock, or hardcoding an
>> >> > > >> index into the existing hid_device_id array for the driver (with the hardcoding
>> >> > > >> being error prone, so not a good idea).
>> >> > > >>
>> >> > > >> Given the problems with using hid_match_one_id() I decided to just go with
>> >> > > >> the above.
>> >> > > >
>> >> > > > right. An other solution would be to have a local macro/function that
>> >> > > > does that. Because as soon as you start adding a quirk, an other comes
>> >> > > > right after.
>> >> > > >
>> >> > > >>
>> >> > > >> But see below.
>> >> > > >>
>> >> > > >>>
>> >> > > >>>> + return true;
>> >> > > >>>> +
>> >> > > >>>> + intf = to_usb_interface(hdev->dev.parent);
>> >> > > >>>
>> >> > > >>> And this is oops-prone. You need:
>> >> > > >>> - ensure hid_is_using_ll_driver(hdev, &usb_hid_driver) returns true.
>> >> > > >>> - add a dependency on USBHID in the KConfig now that you are checking
>> >> > > >>> on the USB transport layer.
>> >> > > >>>
>> >> > > >>> That being said, I would love instead:
>> >> > > >>> - to have a non USB version of this match, where you decide which
>> >> > > >>> component needs to be handled based on the report descriptor
>> >> > > >>
>> >> > > >> Actually your idea to use the desciptors is not bad, but since what
>> >> > > >> we really want is to not bind to the interface which is marked for the
>> >> > > >> hid-multitouch driver I just realized we can just check that.
>> >> > > >>
>> >> > > >> So how about:
>> >> > > >>
>> >> > > >> static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
>> >> > > >> {
>> >> > > >> if (ignore_special_driver)
>> >> > > >> return false;
>> >> > > >>
>> >> > > >> /*
>> >> > > >> * Some keyboard docks with an ITE 8595 keyboard/touchpad controller
>> >> > > >> * support the HID multitouch protocol for the touchpad, in that
>> >> > > >> * case the "Wireless Radio Control" bits which we care about are
>> >> > > >> * on the other interface; and we should not bind to the multitouch
>> >> > > >> * capable interface as that breaks multitouch support.
>> >> > > >> */
>> >> > > >> return hdev->group != HID_GROUP_MULTITOUCH_WIN_8;
>> >> > > >> }
>> >> > > >
>> >> > > > Yep, I like that very much :)
>> >> > >
>> >> > > Actually if we want to check the group and there are only 2 interfaces we do
>> >> > > not need to use the match callback at all, w e can simply match on the
>> >> > > group of the interface which we do want:
>> >> > >
>> >> > > diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
>> >> > > index db0f35be5a8b..21bd48f16033 100644
>> >> > > --- a/drivers/hid/hid-ite.c
>> >> > > +++ b/drivers/hid/hid-ite.c
>> >> > > @@ -56,8 +56,9 @@ static const struct hid_device_id ite_devices[] = {
>> >> > > { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
>> >> > > { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
>> >> > > /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
>> >> > > - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
>> >> > > - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
>> >> > > + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
>> >> > > + USB_VENDOR_ID_SYNAPTICS,
>> >> > > + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
>> >> > > { }
>> >> > > };
>> >> > > MODULE_DEVICE_TABLE(hid, ite_devices);
>> >> > >
>> >> > > Much cleaner
>> >> >
>> >> > yep
>> >> >
>> >> > > (and now I don't need to write a test, which is always
>> >> > > a good motivation to come up with a cleaner solution :)
>> >> >
>> >> > Hehe, too bad, you already picked up my curiosity on this one, and I
>> >> > really would like to see the report descriptors and some events of the
>> >> > keys that are fixed by hid-ite.c.
>> >> > <with a low voice>This will be a hard requirement to accept this patch </joke>.
>> >> >
>> >> > More seriously, Zdeněk, can you run hid-recorder from
>> >> > https://gitlab.freedesktop.org/libevdev/hid-tools/ and provide me the
>> >> > report descriptor for all of your ITE HID devices? I'll add the
>> >> > matching tests in hid-tools and be sure we do not regress in the
>> >> > future.
>> >> >
>> >> > >
>> >> > > Let me turn this into a proper patch and then I will send that to
>> >> > > Zdeněk (off-list) for him to test (note don't worry if you do
>> >> > > not have time to test this weekend, then I'll do it on Monday).
>> >> > >
>> >> > > Regards,
>> >> > >
>> >> > > Hans
>> >> > >
>> >> > > p.s.
>> >> > >
>> >> > > 1. My train is approaching Brussels (Fosdem) so my email response
>> >> > > time will soon become irregular.
>> >> >
>> >> > How dare you? :)
>> >> >
>> >> > >
>> >> > > 2. Benjamin will you be at Fosdem too ?
>> >> > >
>> >> >
>> >> > Unfortunately no. Already got my quota of meeting people for this year
>> >> > between Kernel Recipes in September, XDC in October and LCA last week.
>> >> > So I need to keep in a quiet environment for a little bit :)
>> >> >
>> >> > Cheers,
>> >> > Benjamin
>> >> >
>> >>
>>
On Fri, Jan 31, 2020 at 5:09 PM Z R <zdenda.rampas(a)gmail.com> wrote:
>
> I believe I pressed wifi button on both replays for keyboard.
Yep, I can see that. Just to double check, on the last log, you
pressed the wifi button twice?
Anyway, thanks for all the logs, I should have enough to implement the
regression tests.
Cheers,
Benjamin
>
> With latest patch from Hans on top of v5.5.0 touchpads "two finger scrolling" is working again. Attaching current hid-record for keyboard with Wifi button pressed. Events in log appeared after f3 button was "released".
>
> Thanks
>
> Zdeněk
>
> pá 31. 1. 2020 v 16:45 odesílatel Hans de Goede <hdegoede(a)redhat.com> napsal:
>>
>> Hi,
>>
>> On 1/31/20 4:38 PM, Z R wrote:
>> > Hi Benjamin,
>> > hid-record for keyboard and touchpad. With Commit 8f18eca9ebc5 reverted and from unmodified kernel.
>> >
>> > I hope it is what you asked for :-)
>> >
>> > Currently waiting for reworked patch from Hans.
>>
>> I just send you the reworked patch.
>>
>> Does the recordning include pressing of the wlan on/off key (Fn + F3 I believe) ?
>> That is the whole reason why the special hid-ite driver is necessary.
>>
>> Benjamin about the wlan on/off key. AFAICR on a press + release of the key a
>> single hid input report for the generic-desktop Wireless Radio Controls group
>> is send. This input-report only has the one button with usage code HID_GD_RFKILL_BTN
>> in there and it is always 0. It is as if the input-report is only send on release
>> and not on press. So the hid-ite code emulates a press + release whenever the
>> input-report is send.
>>
>> IOW the receiving of the input report is (ab)used as indication of the button
>> having been pressed.
>>
>> Regards,
>>
>> Hans
>>
>>
>> >
>> > Bye for now
>> > Zdeněk
>> >
>> > pá 31. 1. 2020 v 15:12 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com>> napsal:
>> >
>> > On Fri, Jan 31, 2020 at 3:04 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> > >
>> > > Hi,
>> > >
>> > > On 1/31/20 2:54 PM, Benjamin Tissoires wrote:
>> > > > On Fri, Jan 31, 2020 at 2:41 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> > > >>
>> > > >> Hi,
>> > > >>
>> > > >> On 1/31/20 2:10 PM, Benjamin Tissoires wrote:
>> > > >>> Hi Hans,
>> > > >>>
>> > > >>> On Fri, Jan 31, 2020 at 1:46 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
>> > > >>>>
>> > > >>>> Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
>> > > >>>> dock") added the USB id for the Acer SW5-012's keyboard dock to the
>> > > >>>> hid-ite driver to fix the rfkill driver not working.
>> > > >>>>
>> > > >>>> Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
>> > > >>>> "Wireless Radio Control" bits which need the special hid-ite driver on the
>> > > >>>> second USB interface (the mouse interface) and their touchpad only supports
>> > > >>>> mouse emulation, so using generic hid-input handling for anything but
>> > > >>>> the "Wireless Radio Control" bits is fine. On these devices we simply bind
>> > > >>>> to all USB interfaces.
>> > > >>>>
>> > > >>>> But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
>> > > >>>> (SW5-012)'s touchpad not only does mouse emulation it also supports
>> > > >>>> HID-multitouch and all the keys including the "Wireless Radio Control"
>> > > >>>> bits have been moved to the first USB interface (the keyboard intf).
>> > > >>>>
>> > > >>>> So we need hid-ite to handle the first (keyboard) USB interface and have
>> > > >>>> it NOT bind to the second (mouse) USB interface so that that can be
>> > > >>>> handled by hid-multitouch.c and we get proper multi-touch support.
>> > > >>>>
>> > > >>>> This commit adds a match callback to hid-ite which makes it only
>> > > >>>> match the first USB interface when running on the Acer SW5-012,
>> > > >>>> fixing the regression to mouse-emulation mode introduced by adding the
>> > > >>>> keyboard dock USB id.
>> > > >>>>
>> > > >>>> Note the match function only does the special only bind to the first
>> > > >>>> USB interface on the Acer SW5-012, on other devices the hid-ite driver
>> > > >>>> actually must bind to the second interface as that is where the
>> > > >>>> "Wireless Radio Control" bits are.
>> > > >>>
>> > > >>> This is not a full review, but a couple of things that popped out
>> > > >>> while scrolling through the patch.
>> > > >>>
>> > > >>>>
>> > > >>>> Cc: stable(a)vger.kernel.org <mailto:stable@vger.kernel.org>
>> > > >>>> Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
>> > > >>>> Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com>>
>> > > >>>> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>>
>> > > >>>> ---
>> > > >>>> drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
>> > > >>>> 1 file changed, 34 insertions(+)
>> > > >>>>
>> > > >>>> diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
>> > > >>>> index c436e12feb23..69a4ddfd033d 100644
>> > > >>>> --- a/drivers/hid/hid-ite.c
>> > > >>>> +++ b/drivers/hid/hid-ite.c
>> > > >>>> @@ -8,9 +8,12 @@
>> > > >>>> #include <linux/input.h>
>> > > >>>> #include <linux/hid.h>
>> > > >>>> #include <linux/module.h>
>> > > >>>> +#include <linux/usb.h>
>> > > >>>>
>> > > >>>> #include "hid-ids.h"
>> > > >>>>
>> > > >>>> +#define ITE8595_KBD_USB_INTF 0
>> > > >>>> +
>> > > >>>> static int ite_event(struct hid_device *hdev, struct hid_field *field,
>> > > >>>> struct hid_usage *usage, __s32 value)
>> > > >>>> {
>> > > >>>> @@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
>> > > >>>> return 0;
>> > > >>>> }
>> > > >>>>
>> > > >>>> +static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
>> > > >>>> +{
>> > > >>>> + struct usb_interface *intf;
>> > > >>>> +
>> > > >>>> + if (ignore_special_driver)
>> > > >>>> + return false;
>> > > >>>> +
>> > > >>>> + /*
>> > > >>>> + * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
>> > > >>>> + * have the "Wireless Radio Control" bits which need this special
>> > > >>>> + * driver on the second USB interface (the mouse interface). On
>> > > >>>> + * these devices we simply bind to all USB interfaces.
>> > > >>>> + *
>> > > >>>> + * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
>> > > >>>> + * not only does mouse emulation it also supports HID-multitouch
>> > > >>>> + * and all the keys including the "Wireless Radio Control" bits
>> > > >>>> + * have been moved to the first USB interface (the keyboard intf).
>> > > >>>> + *
>> > > >>>> + * We want the hid-multitouch driver to bind to the touchpad, so on
>> > > >>>> + * the Acer SW5-012 we should only bind to the keyboard USB intf.
>> > > >>>> + */
>> > > >>>> + if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
>> > > >>>> + hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
>> > > >>>
>> > > >>> Isn't there an existing matching function we can use here, instead of
>> > > >>> checking each individual field?
>> > > >>
>> > > >> There is hid_match_one_id() but that is not exported (can be fixed) and it
>> > > >> requires a struct hid_device_id, which either requires declaring an extra
>> > > >> standalone struct hid_device_id for the SW5-012 kbd-dock, or hardcoding an
>> > > >> index into the existing hid_device_id array for the driver (with the hardcoding
>> > > >> being error prone, so not a good idea).
>> > > >>
>> > > >> Given the problems with using hid_match_one_id() I decided to just go with
>> > > >> the above.
>> > > >
>> > > > right. An other solution would be to have a local macro/function that
>> > > > does that. Because as soon as you start adding a quirk, an other comes
>> > > > right after.
>> > > >
>> > > >>
>> > > >> But see below.
>> > > >>
>> > > >>>
>> > > >>>> + return true;
>> > > >>>> +
>> > > >>>> + intf = to_usb_interface(hdev->dev.parent);
>> > > >>>
>> > > >>> And this is oops-prone. You need:
>> > > >>> - ensure hid_is_using_ll_driver(hdev, &usb_hid_driver) returns true.
>> > > >>> - add a dependency on USBHID in the KConfig now that you are checking
>> > > >>> on the USB transport layer.
>> > > >>>
>> > > >>> That being said, I would love instead:
>> > > >>> - to have a non USB version of this match, where you decide which
>> > > >>> component needs to be handled based on the report descriptor
>> > > >>
>> > > >> Actually your idea to use the desciptors is not bad, but since what
>> > > >> we really want is to not bind to the interface which is marked for the
>> > > >> hid-multitouch driver I just realized we can just check that.
>> > > >>
>> > > >> So how about:
>> > > >>
>> > > >> static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
>> > > >> {
>> > > >> if (ignore_special_driver)
>> > > >> return false;
>> > > >>
>> > > >> /*
>> > > >> * Some keyboard docks with an ITE 8595 keyboard/touchpad controller
>> > > >> * support the HID multitouch protocol for the touchpad, in that
>> > > >> * case the "Wireless Radio Control" bits which we care about are
>> > > >> * on the other interface; and we should not bind to the multitouch
>> > > >> * capable interface as that breaks multitouch support.
>> > > >> */
>> > > >> return hdev->group != HID_GROUP_MULTITOUCH_WIN_8;
>> > > >> }
>> > > >
>> > > > Yep, I like that very much :)
>> > >
>> > > Actually if we want to check the group and there are only 2 interfaces we do
>> > > not need to use the match callback at all, w e can simply match on the
>> > > group of the interface which we do want:
>> > >
>> > > diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
>> > > index db0f35be5a8b..21bd48f16033 100644
>> > > --- a/drivers/hid/hid-ite.c
>> > > +++ b/drivers/hid/hid-ite.c
>> > > @@ -56,8 +56,9 @@ static const struct hid_device_id ite_devices[] = {
>> > > { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
>> > > { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
>> > > /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
>> > > - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
>> > > - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
>> > > + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
>> > > + USB_VENDOR_ID_SYNAPTICS,
>> > > + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
>> > > { }
>> > > };
>> > > MODULE_DEVICE_TABLE(hid, ite_devices);
>> > >
>> > > Much cleaner
>> >
>> > yep
>> >
>> > > (and now I don't need to write a test, which is always
>> > > a good motivation to come up with a cleaner solution :)
>> >
>> > Hehe, too bad, you already picked up my curiosity on this one, and I
>> > really would like to see the report descriptors and some events of the
>> > keys that are fixed by hid-ite.c.
>> > <with a low voice>This will be a hard requirement to accept this patch </joke>.
>> >
>> > More seriously, Zdeněk, can you run hid-recorder from
>> > https://gitlab.freedesktop.org/libevdev/hid-tools/ and provide me the
>> > report descriptor for all of your ITE HID devices? I'll add the
>> > matching tests in hid-tools and be sure we do not regress in the
>> > future.
>> >
>> > >
>> > > Let me turn this into a proper patch and then I will send that to
>> > > Zdeněk (off-list) for him to test (note don't worry if you do
>> > > not have time to test this weekend, then I'll do it on Monday).
>> > >
>> > > Regards,
>> > >
>> > > Hans
>> > >
>> > > p.s.
>> > >
>> > > 1. My train is approaching Brussels (Fosdem) so my email response
>> > > time will soon become irregular.
>> >
>> > How dare you? :)
>> >
>> > >
>> > > 2. Benjamin will you be at Fosdem too ?
>> > >
>> >
>> > Unfortunately no. Already got my quota of meeting people for this year
>> > between Kernel Recipes in September, XDC in October and LCA last week.
>> > So I need to keep in a quiet environment for a little bit :)
>> >
>> > Cheers,
>> > Benjamin
>> >
>>
Hi,
On 1/31/20 4:38 PM, Z R wrote:
> Hi Benjamin,
> hid-record for keyboard and touchpad. With Commit 8f18eca9ebc5 reverted and from unmodified kernel.
>
> I hope it is what you asked for :-)
>
> Currently waiting for reworked patch from Hans.
I just send you the reworked patch.
Does the recordning include pressing of the wlan on/off key (Fn + F3 I believe) ?
That is the whole reason why the special hid-ite driver is necessary.
Benjamin about the wlan on/off key. AFAICR on a press + release of the key a
single hid input report for the generic-desktop Wireless Radio Controls group
is send. This input-report only has the one button with usage code HID_GD_RFKILL_BTN
in there and it is always 0. It is as if the input-report is only send on release
and not on press. So the hid-ite code emulates a press + release whenever the
input-report is send.
IOW the receiving of the input report is (ab)used as indication of the button
having been pressed.
Regards,
Hans
>
> Bye for now
> Zdeněk
>
> pá 31. 1. 2020 v 15:12 odesílatel Benjamin Tissoires <benjamin.tissoires(a)redhat.com <mailto:benjamin.tissoires@redhat.com>> napsal:
>
> On Fri, Jan 31, 2020 at 3:04 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
> >
> > Hi,
> >
> > On 1/31/20 2:54 PM, Benjamin Tissoires wrote:
> > > On Fri, Jan 31, 2020 at 2:41 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
> > >>
> > >> Hi,
> > >>
> > >> On 1/31/20 2:10 PM, Benjamin Tissoires wrote:
> > >>> Hi Hans,
> > >>>
> > >>> On Fri, Jan 31, 2020 at 1:46 PM Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>> wrote:
> > >>>>
> > >>>> Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
> > >>>> dock") added the USB id for the Acer SW5-012's keyboard dock to the
> > >>>> hid-ite driver to fix the rfkill driver not working.
> > >>>>
> > >>>> Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
> > >>>> "Wireless Radio Control" bits which need the special hid-ite driver on the
> > >>>> second USB interface (the mouse interface) and their touchpad only supports
> > >>>> mouse emulation, so using generic hid-input handling for anything but
> > >>>> the "Wireless Radio Control" bits is fine. On these devices we simply bind
> > >>>> to all USB interfaces.
> > >>>>
> > >>>> But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
> > >>>> (SW5-012)'s touchpad not only does mouse emulation it also supports
> > >>>> HID-multitouch and all the keys including the "Wireless Radio Control"
> > >>>> bits have been moved to the first USB interface (the keyboard intf).
> > >>>>
> > >>>> So we need hid-ite to handle the first (keyboard) USB interface and have
> > >>>> it NOT bind to the second (mouse) USB interface so that that can be
> > >>>> handled by hid-multitouch.c and we get proper multi-touch support.
> > >>>>
> > >>>> This commit adds a match callback to hid-ite which makes it only
> > >>>> match the first USB interface when running on the Acer SW5-012,
> > >>>> fixing the regression to mouse-emulation mode introduced by adding the
> > >>>> keyboard dock USB id.
> > >>>>
> > >>>> Note the match function only does the special only bind to the first
> > >>>> USB interface on the Acer SW5-012, on other devices the hid-ite driver
> > >>>> actually must bind to the second interface as that is where the
> > >>>> "Wireless Radio Control" bits are.
> > >>>
> > >>> This is not a full review, but a couple of things that popped out
> > >>> while scrolling through the patch.
> > >>>
> > >>>>
> > >>>> Cc: stable(a)vger.kernel.org <mailto:stable@vger.kernel.org>
> > >>>> Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
> > >>>> Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com <mailto:zdenda.rampas@gmail.com>>
> > >>>> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com <mailto:hdegoede@redhat.com>>
> > >>>> ---
> > >>>> drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
> > >>>> 1 file changed, 34 insertions(+)
> > >>>>
> > >>>> diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
> > >>>> index c436e12feb23..69a4ddfd033d 100644
> > >>>> --- a/drivers/hid/hid-ite.c
> > >>>> +++ b/drivers/hid/hid-ite.c
> > >>>> @@ -8,9 +8,12 @@
> > >>>> #include <linux/input.h>
> > >>>> #include <linux/hid.h>
> > >>>> #include <linux/module.h>
> > >>>> +#include <linux/usb.h>
> > >>>>
> > >>>> #include "hid-ids.h"
> > >>>>
> > >>>> +#define ITE8595_KBD_USB_INTF 0
> > >>>> +
> > >>>> static int ite_event(struct hid_device *hdev, struct hid_field *field,
> > >>>> struct hid_usage *usage, __s32 value)
> > >>>> {
> > >>>> @@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
> > >>>> return 0;
> > >>>> }
> > >>>>
> > >>>> +static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
> > >>>> +{
> > >>>> + struct usb_interface *intf;
> > >>>> +
> > >>>> + if (ignore_special_driver)
> > >>>> + return false;
> > >>>> +
> > >>>> + /*
> > >>>> + * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
> > >>>> + * have the "Wireless Radio Control" bits which need this special
> > >>>> + * driver on the second USB interface (the mouse interface). On
> > >>>> + * these devices we simply bind to all USB interfaces.
> > >>>> + *
> > >>>> + * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
> > >>>> + * not only does mouse emulation it also supports HID-multitouch
> > >>>> + * and all the keys including the "Wireless Radio Control" bits
> > >>>> + * have been moved to the first USB interface (the keyboard intf).
> > >>>> + *
> > >>>> + * We want the hid-multitouch driver to bind to the touchpad, so on
> > >>>> + * the Acer SW5-012 we should only bind to the keyboard USB intf.
> > >>>> + */
> > >>>> + if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
> > >>>> + hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
> > >>>
> > >>> Isn't there an existing matching function we can use here, instead of
> > >>> checking each individual field?
> > >>
> > >> There is hid_match_one_id() but that is not exported (can be fixed) and it
> > >> requires a struct hid_device_id, which either requires declaring an extra
> > >> standalone struct hid_device_id for the SW5-012 kbd-dock, or hardcoding an
> > >> index into the existing hid_device_id array for the driver (with the hardcoding
> > >> being error prone, so not a good idea).
> > >>
> > >> Given the problems with using hid_match_one_id() I decided to just go with
> > >> the above.
> > >
> > > right. An other solution would be to have a local macro/function that
> > > does that. Because as soon as you start adding a quirk, an other comes
> > > right after.
> > >
> > >>
> > >> But see below.
> > >>
> > >>>
> > >>>> + return true;
> > >>>> +
> > >>>> + intf = to_usb_interface(hdev->dev.parent);
> > >>>
> > >>> And this is oops-prone. You need:
> > >>> - ensure hid_is_using_ll_driver(hdev, &usb_hid_driver) returns true.
> > >>> - add a dependency on USBHID in the KConfig now that you are checking
> > >>> on the USB transport layer.
> > >>>
> > >>> That being said, I would love instead:
> > >>> - to have a non USB version of this match, where you decide which
> > >>> component needs to be handled based on the report descriptor
> > >>
> > >> Actually your idea to use the desciptors is not bad, but since what
> > >> we really want is to not bind to the interface which is marked for the
> > >> hid-multitouch driver I just realized we can just check that.
> > >>
> > >> So how about:
> > >>
> > >> static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
> > >> {
> > >> if (ignore_special_driver)
> > >> return false;
> > >>
> > >> /*
> > >> * Some keyboard docks with an ITE 8595 keyboard/touchpad controller
> > >> * support the HID multitouch protocol for the touchpad, in that
> > >> * case the "Wireless Radio Control" bits which we care about are
> > >> * on the other interface; and we should not bind to the multitouch
> > >> * capable interface as that breaks multitouch support.
> > >> */
> > >> return hdev->group != HID_GROUP_MULTITOUCH_WIN_8;
> > >> }
> > >
> > > Yep, I like that very much :)
> >
> > Actually if we want to check the group and there are only 2 interfaces we do
> > not need to use the match callback at all, w e can simply match on the
> > group of the interface which we do want:
> >
> > diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
> > index db0f35be5a8b..21bd48f16033 100644
> > --- a/drivers/hid/hid-ite.c
> > +++ b/drivers/hid/hid-ite.c
> > @@ -56,8 +56,9 @@ static const struct hid_device_id ite_devices[] = {
> > { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
> > { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
> > /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
> > - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
> > - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
> > + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
> > + USB_VENDOR_ID_SYNAPTICS,
> > + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
> > { }
> > };
> > MODULE_DEVICE_TABLE(hid, ite_devices);
> >
> > Much cleaner
>
> yep
>
> > (and now I don't need to write a test, which is always
> > a good motivation to come up with a cleaner solution :)
>
> Hehe, too bad, you already picked up my curiosity on this one, and I
> really would like to see the report descriptors and some events of the
> keys that are fixed by hid-ite.c.
> <with a low voice>This will be a hard requirement to accept this patch </joke>.
>
> More seriously, Zdeněk, can you run hid-recorder from
> https://gitlab.freedesktop.org/libevdev/hid-tools/ and provide me the
> report descriptor for all of your ITE HID devices? I'll add the
> matching tests in hid-tools and be sure we do not regress in the
> future.
>
> >
> > Let me turn this into a proper patch and then I will send that to
> > Zdeněk (off-list) for him to test (note don't worry if you do
> > not have time to test this weekend, then I'll do it on Monday).
> >
> > Regards,
> >
> > Hans
> >
> > p.s.
> >
> > 1. My train is approaching Brussels (Fosdem) so my email response
> > time will soon become irregular.
>
> How dare you? :)
>
> >
> > 2. Benjamin will you be at Fosdem too ?
> >
>
> Unfortunately no. Already got my quota of meeting people for this year
> between Kernel Recipes in September, XDC in October and LCA last week.
> So I need to keep in a quiet environment for a little bit :)
>
> Cheers,
> Benjamin
>
sd->devnode is released after calling
v4l2_subdev_release. Therefore it should be set
to NULL so that the subdev won't hold a pointer
to a released object. This fixes a reference
after free bug in function
v4l2_device_unregister_subdev
Cc: stable(a)vger.kernel.org
Fixes: 0e43734d4c46e ("media: v4l2-subdev: add release() internal op")
Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld(a)collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel(a)collabora.com>
---
changes since v2:
- since this is a regresion fix, I added Fixes and Cc to stable tags,
- change the commit title and log to be more clear.
drivers/media/v4l2-core/v4l2-device.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/media/v4l2-core/v4l2-device.c b/drivers/media/v4l2-core/v4l2-device.c
index 63d6b147b21e..2b3595671d62 100644
--- a/drivers/media/v4l2-core/v4l2-device.c
+++ b/drivers/media/v4l2-core/v4l2-device.c
@@ -177,6 +177,7 @@ static void v4l2_subdev_release(struct v4l2_subdev *sd)
{
struct module *owner = !sd->owner_v4l2_dev ? sd->owner : NULL;
+ sd->devnode = NULL;
if (sd->internal_ops && sd->internal_ops->release)
sd->internal_ops->release(sd);
module_put(owner);
--
2.20.1
Commit 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard
dock") added the USB id for the Acer SW5-012's keyboard dock to the
hid-ite driver to fix the rfkill driver not working.
Most keyboard docks with an ITE 8595 keyboard/touchpad controller have the
"Wireless Radio Control" bits which need the special hid-ite driver on the
second USB interface (the mouse interface) and their touchpad only supports
mouse emulation, so using generic hid-input handling for anything but
the "Wireless Radio Control" bits is fine. On these devices we simply bind
to all USB interfaces.
But unlike other ITE8595 using keyboard docks, the Acer Aspire Switch 10
(SW5-012)'s touchpad not only does mouse emulation it also supports
HID-multitouch and all the keys including the "Wireless Radio Control"
bits have been moved to the first USB interface (the keyboard intf).
So we need hid-ite to handle the first (keyboard) USB interface and have
it NOT bind to the second (mouse) USB interface so that that can be
handled by hid-multitouch.c and we get proper multi-touch support.
This commit adds a match callback to hid-ite which makes it only
match the first USB interface when running on the Acer SW5-012,
fixing the regression to mouse-emulation mode introduced by adding the
keyboard dock USB id.
Note the match function only does the special only bind to the first
USB interface on the Acer SW5-012, on other devices the hid-ite driver
actually must bind to the second interface as that is where the
"Wireless Radio Control" bits are.
Cc: stable(a)vger.kernel.org
Fixes: 8f18eca9ebc5 ("HID: ite: Add USB id match for Acer SW5-012 keyboard dock")
Reported-by: Zdeněk Rampas <zdenda.rampas(a)gmail.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/hid/hid-ite.c | 34 ++++++++++++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index c436e12feb23..69a4ddfd033d 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -8,9 +8,12 @@
#include <linux/input.h>
#include <linux/hid.h>
#include <linux/module.h>
+#include <linux/usb.h>
#include "hid-ids.h"
+#define ITE8595_KBD_USB_INTF 0
+
static int ite_event(struct hid_device *hdev, struct hid_field *field,
struct hid_usage *usage, __s32 value)
{
@@ -37,6 +40,36 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
return 0;
}
+static bool ite_match(struct hid_device *hdev, bool ignore_special_driver)
+{
+ struct usb_interface *intf;
+
+ if (ignore_special_driver)
+ return false;
+
+ /*
+ * Most keyboard docks with an ITE 8595 keyboard/touchpad controller
+ * have the "Wireless Radio Control" bits which need this special
+ * driver on the second USB interface (the mouse interface). On
+ * these devices we simply bind to all USB interfaces.
+ *
+ * The Acer Aspire Switch 10 (SW5-012) is special, its touchpad
+ * not only does mouse emulation it also supports HID-multitouch
+ * and all the keys including the "Wireless Radio Control" bits
+ * have been moved to the first USB interface (the keyboard intf).
+ *
+ * We want the hid-multitouch driver to bind to the touchpad, so on
+ * the Acer SW5-012 we should only bind to the keyboard USB intf.
+ */
+ if (hdev->bus != BUS_USB || hdev->vendor != USB_VENDOR_ID_SYNAPTICS ||
+ hdev->product != USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012)
+ return true;
+
+ intf = to_usb_interface(hdev->dev.parent);
+
+ return intf->altsetting->desc.bInterfaceNumber == ITE8595_KBD_USB_INTF;
+}
+
static const struct hid_device_id ite_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
{ HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
@@ -50,6 +83,7 @@ MODULE_DEVICE_TABLE(hid, ite_devices);
static struct hid_driver ite_driver = {
.name = "itetech",
.id_table = ite_devices,
+ .match = ite_match,
.event = ite_event,
};
module_hid_driver(ite_driver);
--
2.23.0
Some (somewhat older) laptops have a correct BGRT table, except that the
version field is 0 instead of 1.
This has been seen on several Ivy Bridge based Lenovo models.
For now the spec. only defines version 1, so it is reasonably safe to
assume that tables with a version of 0 really are version 1 too,
which is what this commit does so that the BGRT table will be accepted
by the kernel on laptop models with this issue.
Cc: stable(a)vger.kernel.org
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1791273
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
Changes in v2:
- Simply always accept version 0 everywhere as suggested by Ard
---
drivers/firmware/efi/efi-bgrt.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c
index b07c17643210..6aafdb67dbca 100644
--- a/drivers/firmware/efi/efi-bgrt.c
+++ b/drivers/firmware/efi/efi-bgrt.c
@@ -42,7 +42,12 @@ void __init efi_bgrt_init(struct acpi_table_header *table)
return;
}
*bgrt = *(struct acpi_table_bgrt *)table;
- if (bgrt->version != 1) {
+ /*
+ * Only version 1 is defined but some older laptops (seen on Lenovo
+ * Ivy Bridge models) have a correct version 1 BGRT table with the
+ * version set to 0, so we accept version 0 and 1.
+ */
+ if (bgrt->version > 1) {
pr_notice("Ignoring BGRT: invalid version %u (expected 1)\n",
bgrt->version);
goto out;
--
2.23.0
These patches include few backported fixes for the 4.4 stable
tree.
I would appreciate if you could kindly consider including them in the
next release.
Ajay
---
[Changes from v2]:
Merged following changes from Vlastimil's series [1]:
- Added page_ref_count() in [Patch v3 5/8]
- Added missing refcount overflow checks on x86 and s390 [Patch v3 5/8]
- Added [Patch v3 8/8]
- Removed 7aef4172c795 i.e. [Patch v2 3/8]
[1] https://lore.kernel.org/stable/20191108093814.16032-1-vbabka@suse.cz/
---
[PATCH v3 1/8]:
Backporting of upstream commit f958d7b528b1:
mm: make page ref count overflow check tighter and more explicit
[PATCH v3 2/8]:
Backporting of upstream commit 88b1a17dfc3e:
mm: add 'try_get_page()' helper function
[PATCH v3 3/8]:
Backporting of upstream commit a3e328556d41:
mm, gup: remove broken VM_BUG_ON_PAGE compound check for hugepages
[PATCH v3 4/8]:
Backporting of upstream commit d63206ee32b6:
mm, gup: ensure real head page is ref-counted when using hugepages
[PATCH v3 5/8]:
Backporting of upstream commit 8fde12ca79af:
mm: prevent get_user_pages() from overflowing page refcount
[PATCH v3 6/8]:
Backporting of upstream commit 7bf2d1df8082:
pipe: add pipe_buf_get() helper
[PATCH v3 7/8]:
Backporting of upstream commit 15fab63e1e57:
fs: prevent page refcount overflow in pipe_buf_get
[PATCH v3 8/8]:
x86, mm, gup: prevent get_page() race with munmap in paravirt guest
Some (somewhat older) Lenovo laptops have a correct BGRT table, except
that the version field is 0 instead of 1.
Quickly after finding this out, even before submitting a first version of
this patch upstream, the list of DMI matches for affected models grew to
3 models (all Ivy Bridge based).
So rather then maintaining an ever growing list with DMI matches for
affected Lenovo models, this commit simply checks if the vendor is Lenovo
when the version is 0 and in that case accepts the out of spec version
working around the Lenovo firmware bug.
Cc: stable(a)vger.kernel.org
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1791273
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/firmware/efi/efi-bgrt.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c
index b07c17643210..3a2d6d3a590b 100644
--- a/drivers/firmware/efi/efi-bgrt.c
+++ b/drivers/firmware/efi/efi-bgrt.c
@@ -15,6 +15,7 @@
#include <linux/acpi.h>
#include <linux/efi.h>
#include <linux/efi-bgrt.h>
+#include <linux/dmi.h>
struct acpi_table_bgrt bgrt_tab;
size_t bgrt_image_size;
@@ -42,7 +43,12 @@ void __init efi_bgrt_init(struct acpi_table_header *table)
return;
}
*bgrt = *(struct acpi_table_bgrt *)table;
- if (bgrt->version != 1) {
+ /*
+ * Some older Lenovo laptops have a correct BGRT table, except that
+ * the version field is 0 instead of 1.
+ */
+ if (bgrt->version != 1 &&
+ !(bgrt->version == 0 && dmi_name_in_vendors("LENOVO"))) {
pr_notice("Ignoring BGRT: invalid version %u (expected 1)\n",
bgrt->version);
goto out;
--
2.24.1
This patch corrects the condition to kick the transfer without
giving back the requests when either request has remaining data
or when there are pending SGs. The && check was introduced during
spliting up the dwc3_gadget_ep_cleanup_completed_requests() function.
Fixes: f38e35dd84e2 ("usb: dwc3: gadget: split dwc3_gadget_ep_cleanup_completed_requests()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tejas Joglekar <joglekar(a)synopsys.com>
---
drivers/usb/dwc3/gadget.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 86dc1db788a9..e07159e06f9a 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2485,7 +2485,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
req->request.actual = req->request.length - req->remaining;
- if (!dwc3_gadget_ep_request_completed(req) &&
+ if (!dwc3_gadget_ep_request_completed(req) ||
req->num_pending_sgs) {
__dwc3_gadget_kick_transfer(dep);
goto out;
--
2.11.0
From: John Hubbard <jhubbard(a)nvidia.com>
Subject: media/v4l2-core: set pages dirty upon releasing DMA buffers
After DMA is complete, and the device and CPU caches are synchronized,
it's still required to mark the CPU pages as dirty, if the data was coming
from the device. However, this driver was just issuing a bare put_page()
call, without any set_page_dirty*() call.
Fix the problem, by calling set_page_dirty_lock() if the CPU pages were
potentially receiving data from the device.
Link: http://lkml.kernel.org/r/20200107224558.2362728-11-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Acked-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl>
Cc: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Cc: Alex Williamson <alex.williamson(a)redhat.com>
Cc: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
Cc: Björn Töpel <bjorn.topel(a)intel.com>
Cc: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Ira Weiny <ira.weiny(a)intel.com>
Cc: Jan Kara <jack(a)suse.cz>
Cc: Jason Gunthorpe <jgg(a)mellanox.com>
Cc: Jason Gunthorpe <jgg(a)ziepe.ca>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Jerome Glisse <jglisse(a)redhat.com>
Cc: Jonathan Corbet <corbet(a)lwn.net>
Cc: Kirill A. Shutemov <kirill(a)shutemov.name>
Cc: Leon Romanovsky <leonro(a)mellanox.com>
Cc: Mike Rapoport <rppt(a)linux.ibm.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/media/v4l2-core/videobuf-dma-sg.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c~media-v4l2-core-set-pages-dirty-upon-releasing-dma-buffers
+++ a/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -349,8 +349,11 @@ int videobuf_dma_free(struct videobuf_dm
BUG_ON(dma->sglen);
if (dma->pages) {
- for (i = 0; i < dma->nr_pages; i++)
+ for (i = 0; i < dma->nr_pages; i++) {
+ if (dma->direction == DMA_FROM_DEVICE)
+ set_page_dirty_lock(dma->pages[i]);
put_page(dma->pages[i]);
+ }
kfree(dma->pages);
dma->pages = NULL;
}
_
From: Yang Shi <yang.shi(a)linux.alibaba.com>
Subject: mm: move_pages: report the number of non-attempted pages
Since commit a49bd4d71637 ("mm, numa: rework do_pages_move"), the semantic
of move_pages() has changed to return the number of non-migrated pages if
they were result of a non-fatal reasons (usually a busy page). This was
an unintentional change that hasn't been noticed except for LTP tests
which checked for the documented behavior.
There are two ways to go around this change. We can even get back to the
original behavior and return -EAGAIN whenever migrate_pages is not able to
migrate pages due to non-fatal reasons. Another option would be to simply
continue with the changed semantic and extend move_pages documentation to
clarify that -errno is returned on an invalid input or when migration
simply cannot succeed (e.g. -ENOMEM, -EBUSY) or the number of pages that
couldn't have been migrated due to ephemeral reasons (e.g. page is pinned
or locked for other reasons).
This patch implements the second option because this behavior is in place
for some time without anybody complaining and possibly new users depending
on it. Also it allows to have a slightly easier error handling as the
caller knows that it is worth to retry when err > 0.
But since the new semantic would be aborted immediately if migration is
failed due to ephemeral reasons, need include the number of non-attempted
pages in the return value too.
Link: http://lkml.kernel.org/r/1580160527-109104-1-git-send-email-yang.shi@linux.…
Fixes: a49bd4d71637 ("mm, numa: rework do_pages_move")
Signed-off-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Suggested-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Wei Yang <richardw.yang(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org> [4.17+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/migrate.c | 25 +++++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
--- a/mm/migrate.c~mm-move_pages-report-the-number-of-non-attempted-pages
+++ a/mm/migrate.c
@@ -1627,8 +1627,19 @@ static int do_pages_move(struct mm_struc
start = i;
} else if (node != current_node) {
err = do_move_pages_to_node(mm, &pagelist, current_node);
- if (err)
+ if (err) {
+ /*
+ * Positive err means the number of failed
+ * pages to migrate. Since we are going to
+ * abort and return the number of non-migrated
+ * pages, so need to incude the rest of the
+ * nr_pages that have not been attempted as
+ * well.
+ */
+ if (err > 0)
+ err += nr_pages - i - 1;
goto out;
+ }
err = store_status(status, start, current_node, i - start);
if (err)
goto out;
@@ -1659,8 +1670,11 @@ static int do_pages_move(struct mm_struc
goto out_flush;
err = do_move_pages_to_node(mm, &pagelist, current_node);
- if (err)
+ if (err) {
+ if (err > 0)
+ err += nr_pages - i - 1;
goto out;
+ }
if (i > start) {
err = store_status(status, start, current_node, i - start);
if (err)
@@ -1674,6 +1688,13 @@ out_flush:
/* Make sure we do not overwrite the existing error */
err1 = do_move_pages_to_node(mm, &pagelist, current_node);
+ /*
+ * Don't have to report non-attempted pages here since:
+ * - If the above loop is done gracefully all pages have been
+ * attempted.
+ * - If the above loop is aborted it means a fatal error
+ * happened, should return ret.
+ */
if (!err1)
err1 = store_status(status, start, current_node, i - start);
if (err >= 0)
_
From: Wei Yang <richardw.yang(a)linux.intel.com>
Subject: mm: thp: don't need care deferred split queue in memcg charge move path
If compound is true, this means it is a PMD mapped THP. Which implies
the page is not linked to any defer list. So the first code chunk will
not be executed.
Also with this reason, it would not be proper to add this page to a
defer list. So the second code chunk is not correct.
Based on this, we should remove the defer list related code.
[yang.shi(a)linux.alibaba.com: better patch title]
Link: http://lkml.kernel.org/r/20200117233836.3434-1-richardw.yang@linux.intel.com
Fixes: 87eaceb3faa5 ("mm: thp: make deferred split shrinker memcg aware")
Signed-off-by: Wei Yang <richardw.yang(a)linux.intel.com>
Suggested-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Acked-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev(a)gmail.com>
Cc: <stable(a)vger.kernel.org> [5.4+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memcontrol.c | 18 ------------------
1 file changed, 18 deletions(-)
--- a/mm/memcontrol.c~mm-thp-remove-the-defer-list-related-code-since-this-will-not-happen
+++ a/mm/memcontrol.c
@@ -5340,14 +5340,6 @@ static int mem_cgroup_move_account(struc
__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (compound && !list_empty(page_deferred_list(page))) {
- spin_lock(&from->deferred_split_queue.split_queue_lock);
- list_del_init(page_deferred_list(page));
- from->deferred_split_queue.split_queue_len--;
- spin_unlock(&from->deferred_split_queue.split_queue_lock);
- }
-#endif
/*
* It is safe to change page->mem_cgroup here because the page
* is referenced, charged, and isolated - we can't race with
@@ -5357,16 +5349,6 @@ static int mem_cgroup_move_account(struc
/* caller should have done css_get */
page->mem_cgroup = to;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (compound && list_empty(page_deferred_list(page))) {
- spin_lock(&to->deferred_split_queue.split_queue_lock);
- list_add_tail(page_deferred_list(page),
- &to->deferred_split_queue.split_queue);
- to->deferred_split_queue.split_queue_len++;
- spin_unlock(&to->deferred_split_queue.split_queue_lock);
- }
-#endif
-
spin_unlock_irqrestore(&from->move_lock, flags);
ret = 0;
_
From: Dan Williams <dan.j.williams(a)intel.com>
Subject: mm/memory_hotplug: fix remove_memory() lockdep splat
The daxctl unit test for the dax_kmem driver currently triggers the (false
positive) lockdep splat below. It results from the fact that
remove_memory_block_devices() is invoked under the mem_hotplug_lock()
causing lockdep entanglements with cpu_hotplug_lock() and sysfs (kernfs
active state tracking). It is a false positive because the sysfs
attribute path triggering the memory remove is not the same attribute path
associated with memory-block device.
sysfs_break_active_protection() is not applicable since there is no real
deadlock conflict, instead move memory-block device removal outside the
lock. The mem_hotplug_lock() is not needed to synchronize the
memory-block device removal vs the page online state, that is already
handled by lock_device_hotplug(). Specifically, lock_device_hotplug() is
sufficient to allow try_remove_memory() to check the offline state of the
memblocks and be assured that any in progress online attempts are flushed
/ blocked by kernfs_drain() / attribute removal.
The add_memory() path safely creates memblock devices under the
mem_hotplug_lock(). There is no kernfs active state synchronization in
the memblock device_register() path, so nothing to fix there.
This change is only possible thanks to the recent change that refactored
memory block device removal out of arch_remove_memory() (commit
4c4b7f9ba948 mm/memory_hotplug: remove memory block devices before
arch_remove_memory()), and David's due diligence tracking down the
guarantees afforded by kernfs_drain(). Not flagged for -stable since this
only impacts ongoing development and lockdep validation, not a runtime
issue.
======================================================
WARNING: possible circular locking dependency detected
5.5.0-rc3+ #230 Tainted: G OE
------------------------------------------------------
lt-daxctl/6459 is trying to acquire lock:
ffff99c7f0003510 (kn->count#241){++++}, at: kernfs_remove_by_name_ns+0x41/0x80
but task is already holding lock:
ffffffffa76a5450 (mem_hotplug_lock.rw_sem){++++}, at: percpu_down_write+0x20/0xe0
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (mem_hotplug_lock.rw_sem){++++}:
__lock_acquire+0x39c/0x790
lock_acquire+0xa2/0x1b0
get_online_mems+0x3e/0xb0
kmem_cache_create_usercopy+0x2e/0x260
kmem_cache_create+0x12/0x20
ptlock_cache_init+0x20/0x28
start_kernel+0x243/0x547
secondary_startup_64+0xb6/0xc0
-> #1 (cpu_hotplug_lock.rw_sem){++++}:
__lock_acquire+0x39c/0x790
lock_acquire+0xa2/0x1b0
cpus_read_lock+0x3e/0xb0
online_pages+0x37/0x300
memory_subsys_online+0x17d/0x1c0
device_online+0x60/0x80
state_store+0x65/0xd0
kernfs_fop_write+0xcf/0x1c0
vfs_write+0xdb/0x1d0
ksys_write+0x65/0xe0
do_syscall_64+0x5c/0xa0
entry_SYSCALL_64_after_hwframe+0x49/0xbe
-> #0 (kn->count#241){++++}:
check_prev_add+0x98/0xa40
validate_chain+0x576/0x860
__lock_acquire+0x39c/0x790
lock_acquire+0xa2/0x1b0
__kernfs_remove+0x25f/0x2e0
kernfs_remove_by_name_ns+0x41/0x80
remove_files.isra.0+0x30/0x70
sysfs_remove_group+0x3d/0x80
sysfs_remove_groups+0x29/0x40
device_remove_attrs+0x39/0x70
device_del+0x16a/0x3f0
device_unregister+0x16/0x60
remove_memory_block_devices+0x82/0xb0
try_remove_memory+0xb5/0x130
remove_memory+0x26/0x40
dev_dax_kmem_remove+0x44/0x6a [kmem]
device_release_driver_internal+0xe4/0x1c0
unbind_store+0xef/0x120
kernfs_fop_write+0xcf/0x1c0
vfs_write+0xdb/0x1d0
ksys_write+0x65/0xe0
do_syscall_64+0x5c/0xa0
entry_SYSCALL_64_after_hwframe+0x49/0xbe
other info that might help us debug this:
Chain exists of:
kn->count#241 --> cpu_hotplug_lock.rw_sem --> mem_hotplug_lock.rw_sem
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(mem_hotplug_lock.rw_sem);
lock(cpu_hotplug_lock.rw_sem);
lock(mem_hotplug_lock.rw_sem);
lock(kn->count#241);
*** DEADLOCK ***
No fixes tag as this has been a long standing issue that predated the
addition of kernfs lockdep annotations.
Link: http://lkml.kernel.org/r/157991441887.2763922.4770790047389427325.stgit@dwi…
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: Vishal Verma <vishal.l.verma(a)intel.com>
Cc: Pavel Tatashin <pasha.tatashin(a)soleen.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
--- a/mm/memory_hotplug.c~mm-memory_hotplug-fix-remove_memory-lockdep-splat
+++ a/mm/memory_hotplug.c
@@ -1764,8 +1764,6 @@ static int __ref try_remove_memory(int n
BUG_ON(check_hotplug_memory_range(start, size));
- mem_hotplug_begin();
-
/*
* All memory blocks must be offlined before removing memory. Check
* whether all memory blocks in question are offline and return error
@@ -1778,9 +1776,14 @@ static int __ref try_remove_memory(int n
/* remove memmap entry */
firmware_map_remove(start, start + size, "System RAM");
- /* remove memory block devices before removing memory */
+ /*
+ * Memory block device removal under the device_hotplug_lock is
+ * a barrier against racing online attempts.
+ */
remove_memory_block_devices(start, size);
+ mem_hotplug_begin();
+
arch_remove_memory(nid, start, size, NULL);
memblock_free(start, size);
memblock_remove(start, size);
_
From: Wei Yang <richardw.yang(a)linux.intel.com>
Subject: mm/migrate.c: also overwrite error when it is bigger than zero
If we get here after successfully adding page to list, err would be 1 to
indicate the page is queued in the list.
Current code has two problems:
* on success, 0 is not returned
* on error, if add_page_for_migratioin() return 1, and the following err1
from do_move_pages_to_node() is set, the err1 is not returned since err
is 1
And these behaviors break the user interface.
Link: http://lkml.kernel.org/r/20200119065753.21694-1-richardw.yang@linux.intel.c…
Fixes: e0153fc2c760 ("mm: move_pages: return valid node id in status if the page is already on the target node").
Signed-off-by: Wei Yang <richardw.yang(a)linux.intel.com>
Acked-by: Yang Shi <yang.shi(a)linux.alibaba.com>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/migrate.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/migrate.c~mm-migratec-also-overwrite-error-when-it-is-bigger-than-zero
+++ a/mm/migrate.c
@@ -1676,7 +1676,7 @@ out_flush:
err1 = do_move_pages_to_node(mm, &pagelist, current_node);
if (!err1)
err1 = store_status(status, start, current_node, i - start);
- if (!err)
+ if (err >= 0)
err = err1;
out:
return err;
_
From: Pingfan Liu <kernelfans(a)gmail.com>
Subject: mm/sparse.c: reset section's mem_map when fully deactivated
After commit ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug"),
when a mem section is fully deactivated, section_mem_map still records the
section's start pfn, which is not used any more and will be reassigned
during re-addition.
In analogy with alloc/free pattern, it is better to clear all fields of
section_mem_map.
Beside this, it breaks the user space tool "makedumpfile" [1], which makes
assumption that a hot-removed section has mem_map as NULL, instead of
checking directly against SECTION_MARKED_PRESENT bit. (makedumpfile will
be better to change the assumption, and need a patch)
The bug can be reproduced on IBM POWERVM by "drmgr -c mem -r -q 5" ,
trigger a crash, and save vmcore by makedumpfile
[1]: makedumpfile, commit e73016540293 ("[v1.6.7] Update version")
Link: http://lkml.kernel.org/r/1579487594-28889-1-git-send-email-kernelfans@gmail…
Signed-off-by: Pingfan Liu <kernelfans(a)gmail.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Baoquan He <bhe(a)redhat.com>
Cc: Qian Cai <cai(a)lca.pw>
Cc: Kazuhito Hagio <k-hagio(a)ab.jp.nec.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/sparse.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/sparse.c~mm-sparse-reset-sections-mem_map-when-fully-deactivated
+++ a/mm/sparse.c
@@ -789,7 +789,7 @@ static void section_deactivate(unsigned
ms->usage = NULL;
}
memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
- ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr);
+ ms->section_mem_map = (unsigned long)NULL;
}
if (section_is_early && memmap)
_
From: Dan Carpenter <dan.carpenter(a)oracle.com>
Subject: mm/mempolicy.c: fix out of bounds write in mpol_parse_str()
What we are trying to do is change the '=' character to a NUL terminator
and then at the end of the function we restore it back to an '='. The
problem is there are two error paths where we jump to the end of the
function before we have replaced the '=' with NUL. We end up putting the
'=' in the wrong place (possibly one element before the start of the
buffer).
Link: http://lkml.kernel.org/r/20200115055426.vdjwvry44nfug7yy@kili.mountain
Reported-by: syzbot+e64a13c5369a194d67df(a)syzkaller.appspotmail.com
Fixes: 095f1fc4ebf3 ("mempolicy: rework shmem mpol parsing and display")
Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Dmitry Vyukov <dvyukov(a)google.com>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Dan Carpenter <dan.carpenter(a)oracle.com>
Cc: Lee Schermerhorn <lee.schermerhorn(a)hp.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mempolicy.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
--- a/mm/mempolicy.c~mm-mempolicyc-fix-out-of-bounds-write-in-mpol_parse_str
+++ a/mm/mempolicy.c
@@ -2821,6 +2821,9 @@ int mpol_parse_str(char *str, struct mem
char *flags = strchr(str, '=');
int err = 1, mode;
+ if (flags)
+ *flags++ = '\0'; /* terminate mode string */
+
if (nodelist) {
/* NUL-terminate mode or flags string */
*nodelist++ = '\0';
@@ -2831,9 +2834,6 @@ int mpol_parse_str(char *str, struct mem
} else
nodes_clear(nodes);
- if (flags)
- *flags++ = '\0'; /* terminate mode string */
-
mode = match_string(policy_modes, MPOL_MAX, str);
if (mode < 0)
goto out;
_
From: "Theodore Ts'o" <tytso(a)mit.edu>
Subject: memcg: fix a crash in wb_workfn when a device disappears
Without memcg, there is a one-to-one mapping between the bdi and
bdi_writeback structures. In this world, things are fairly
straightforward; the first thing bdi_unregister() does is to shutdown the
bdi_writeback structure (or wb), and part of that writeback ensures that
no other work queued against the wb, and that the wb is fully drained.
With memcg, however, there is a one-to-many relationship between the bdi
and bdi_writeback structures; that is, there are multiple wb objects which
can all point to a single bdi. There is a refcount which prevents the bdi
object from being released (and hence, unregistered). So in theory, the
bdi_unregister() *should* only get called once its refcount goes to zero
(bdi_put will drop the refcount, and when it is zero, release_bdi gets
called, which calls bdi_unregister).
Unfortunately, del_gendisk() in block/gen_hd.c never got the memo about
the Brave New memcg World, and calls bdi_unregister directly. It does
this without informing the file system, or the memcg code, or anything
else. This causes the root wb associated with the bdi to be unregistered,
but none of the memcg-specific wb's are shutdown. So when one of these
wb's are woken up to do delayed work, they try to dereference their
wb->bdi->dev to fetch the device name, but unfortunately bdi->dev is now
NULL, thanks to the bdi_unregister() called by del_gendisk(). As a
result, *boom*.
Fortunately, it looks like the rest of the writeback path is perfectly
happy with bdi->dev and bdi->owner being NULL, so the simplest fix is to
create a bdi_dev_name() function which can handle bdi->dev being NULL.
This also allows us to bulletproof the writeback tracepoints to prevent
them from dereferencing a NULL pointer and crashing the kernel if one is
tracing with memcg's enabled, and an iSCSI device dies or a USB storage
stick is pulled.
The most common way of triggering this will be hotremoval of a device
while writeback with memcg enabled is going on. It was triggering several
times a day in a heavily loaded production environment.
Google Bug Id: 145475544
Link: https://lore.kernel.org/r/20191227194829.150110-1-tytso@mit.edu
Link: http://lkml.kernel.org/r/20191228005211.163952-1-tytso@mit.edu
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: Chris Mason <clm(a)fb.com>
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/fs-writeback.c | 2 -
include/linux/backing-dev.h | 10 +++++++
include/trace/events/writeback.h | 37 +++++++++++++----------------
mm/backing-dev.c | 1
4 files changed, 29 insertions(+), 21 deletions(-)
--- a/fs/fs-writeback.c~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/fs/fs-writeback.c
@@ -2063,7 +2063,7 @@ void wb_workfn(struct work_struct *work)
struct bdi_writeback, dwork);
long pages_written;
- set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
+ set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
--- a/include/linux/backing-dev.h~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/include/linux/backing-dev.h
@@ -13,6 +13,7 @@
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/blkdev.h>
+#include <linux/device.h>
#include <linux/writeback.h>
#include <linux/blk-cgroup.h>
#include <linux/backing-dev-defs.h>
@@ -504,4 +505,13 @@ static inline int bdi_rw_congested(struc
(1 << WB_async_congested));
}
+extern const char *bdi_unknown_name;
+
+static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
+{
+ if (!bdi || !bdi->dev)
+ return bdi_unknown_name;
+ return dev_name(bdi->dev);
+}
+
#endif /* _LINUX_BACKING_DEV_H */
--- a/include/trace/events/writeback.h~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/include/trace/events/writeback.h
@@ -67,8 +67,8 @@ DECLARE_EVENT_CLASS(writeback_page_templ
TP_fast_assign(
strscpy_pad(__entry->name,
- mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)",
- 32);
+ bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
+ NULL), 32);
__entry->ino = mapping ? mapping->host->i_ino : 0;
__entry->index = page->index;
),
@@ -111,8 +111,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inod
struct backing_dev_info *bdi = inode_to_bdi(inode);
/* may be called for files on pseudo FSes w/ unregistered bdi */
- strscpy_pad(__entry->name,
- bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->flags = flags;
@@ -193,7 +192,7 @@ TRACE_EVENT(inode_foreign_history,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(inode_to_bdi(inode)->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
__entry->history = history;
@@ -222,7 +221,7 @@ TRACE_EVENT(inode_switch_wbs,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(old_wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(old_wb->bdi), 32);
__entry->ino = inode->i_ino;
__entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb);
__entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb);
@@ -255,7 +254,7 @@ TRACE_EVENT(track_foreign_dirty,
struct address_space *mapping = page_mapping(page);
struct inode *inode = mapping ? mapping->host : NULL;
- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->bdi_id = wb->bdi->id;
__entry->ino = inode ? inode->i_ino : 0;
__entry->memcg_id = wb->memcg_css->id;
@@ -288,7 +287,7 @@ TRACE_EVENT(flush_foreign,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strncpy(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
__entry->frn_bdi_id = frn_bdi_id;
__entry->frn_memcg_id = frn_memcg_id;
@@ -318,7 +317,7 @@ DECLARE_EVENT_CLASS(writeback_write_inod
TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->sync_mode = wbc->sync_mode;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
@@ -361,9 +360,7 @@ DECLARE_EVENT_CLASS(writeback_work_class
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
- strscpy_pad(__entry->name,
- wb->bdi->dev ? dev_name(wb->bdi->dev) :
- "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->nr_pages = work->nr_pages;
__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
__entry->sync_mode = work->sync_mode;
@@ -416,7 +413,7 @@ DECLARE_EVENT_CLASS(writeback_class,
__field(ino_t, cgroup_ino)
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: cgroup_ino=%lu",
@@ -438,7 +435,7 @@ TRACE_EVENT(writeback_bdi_register,
__array(char, name, 32)
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
),
TP_printk("bdi %s",
__entry->name
@@ -463,7 +460,7 @@ DECLARE_EVENT_CLASS(wbc_class,
),
TP_fast_assign(
- strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->sync_mode = wbc->sync_mode;
@@ -514,7 +511,7 @@ TRACE_EVENT(writeback_queue_io,
),
TP_fast_assign(
unsigned long *older_than_this = work->older_than_this;
- strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->older = older_than_this ? *older_than_this : 0;
__entry->age = older_than_this ?
(jiffies - *older_than_this) * 1000 / HZ : -1;
@@ -600,7 +597,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
),
TP_fast_assign(
- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->write_bw = KBps(wb->write_bandwidth);
__entry->avg_write_bw = KBps(wb->avg_write_bandwidth);
__entry->dirty_rate = KBps(dirty_rate);
@@ -665,7 +662,7 @@ TRACE_EVENT(balance_dirty_pages,
TP_fast_assign(
unsigned long freerun = (thresh + bg_thresh) / 2;
- strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_wb_domain.dirty_limit +
@@ -726,7 +723,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
@@ -800,7 +797,7 @@ DECLARE_EVENT_CLASS(writeback_single_ino
TP_fast_assign(
strscpy_pad(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
--- a/mm/backing-dev.c~memcg-fix-a-crash-in-wb_workfn-when-a-device-disappears
+++ a/mm/backing-dev.c
@@ -21,6 +21,7 @@ struct backing_dev_info noop_backing_dev
EXPORT_SYMBOL_GPL(noop_backing_dev_info);
static struct class *bdi_class;
+const char *bdi_unknown_name = "(unknown)";
/*
* bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
_
From: Brendan Higgins <brendanhiggins(a)google.com>
Currently CONFIG_FSI_MASTER_ASPEED=y implicitly depends on
CONFIG_HAS_IOMEM=y; consequently, on architectures without IOMEM we get
the following build error:
ld: drivers/fsi/fsi-master-aspeed.o: in function `fsi_master_aspeed_probe':
drivers/fsi/fsi-master-aspeed.c:436: undefined reference to `devm_ioremap_resource'
Fix the build error by adding the unspecified dependency.
Fixes: 606397d67f41 ("fsi: Add ast2600 master driver")
Cc: stable(a)vger.kernel.org
Reported-by: Brendan Higgins <brendanhiggins(a)google.com>
Signed-off-by: Brendan Higgins <brendanhiggins(a)google.com>
Reviewed-by: Joel Stanley <joel(a)jms.id.au>
Signed-off-by: Joel Stanley <joel(a)jms.id.au>
---
Greg, can you please pick this one up. Brendan has asked it be included
in 5.6.
drivers/fsi/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
index 92ce6d85802c..4cc0e630ab79 100644
--- a/drivers/fsi/Kconfig
+++ b/drivers/fsi/Kconfig
@@ -55,6 +55,7 @@ config FSI_MASTER_AST_CF
config FSI_MASTER_ASPEED
tristate "FSI ASPEED master"
+ depends on HAS_IOMEM
help
This option enables a FSI master that is present behind an OPB bridge
in the AST2600.
--
2.24.1
Hi Greg, Sasha,
Could you backport upstream commit
de19055564c8f8f9d366f8db3395836da0b2176c ("Documentation: Document arm64
kpti control") to the stable 4.9, 4.14 and 4.19 kernels since they all
support the command line parameter.
Thank you!
--
Florian
Hello,
We ran automated tests on a recent commit from this kernel tree:
Kernel repo: git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
Commit: 3099711749b2 - Linux 5.4.17-rc1
The results of these automated tests are provided below.
Overall result: PASSED
Merge: OK
Compile: OK
Tests: OK
All kernel binaries, config files, and logs are available for download here:
https://artifacts.cki-project.org/pipelines/411827
Please reply to this email if you have any questions about the tests that we
ran or if you have any suggestions on how to make future tests more effective.
,-. ,-.
( C ) ( K ) Continuous
`-',-.`-' Kernel
( I ) Integration
`-'
______________________________________________________________________________
Compile testing
---------------
We compiled the kernel for 3 architectures:
aarch64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
ppc64le:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
x86_64:
make options: -j30 INSTALL_MOD_STRIP=1 targz-pkg
Hardware testing
----------------
We booted each kernel and ran the following tests:
aarch64:
Host 1:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
ppc64le:
Host 1:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Host 2:
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ✅ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
x86_64:
Host 1:
✅ Boot test
✅ Storage SAN device stress - megaraid_sas
Host 2:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
✅ Boot test
✅ Podman system integration test (as root)
✅ Podman system integration test (as user)
✅ LTP
✅ Loopdev Sanity
✅ Memory function: memfd_create
✅ AMTU (Abstract Machine Test Utility)
✅ Networking bridge: sanity
✅ Ethernet drivers sanity
✅ Networking MACsec: sanity
✅ Networking socket: fuzz
✅ Networking sctp-auth: sockopts test
✅ Networking: igmp conformance test
✅ Networking route: pmtu
✅ Networking route_func: local
✅ Networking route_func: forward
✅ Networking TCP: keepalive test
✅ Networking UDP: socket
✅ Networking tunnel: geneve basic test
✅ Networking tunnel: gre basic
✅ L2TP basic test
✅ Networking tunnel: vxlan basic
✅ Networking ipsec: basic netns transport
✅ Networking ipsec: basic netns tunnel
✅ audit: audit testsuite test
✅ httpd: mod_ssl smoke sanity
✅ tuned: tune-processes-through-perf
✅ pciutils: sanity smoke test
✅ ALSA PCM loopback test
✅ ALSA Control (mixer) Userspace Element test
✅ storage: SCSI VPD
✅ trace: ftrace/tracer
🚧 ✅ CIFS Connectathon
🚧 ✅ POSIX pjd-fstest suites
🚧 ⚡⚡⚡ jvm test suite
🚧 ✅ Memory function: kaslr
🚧 ✅ LTP: openposix test suite
🚧 ✅ Networking vnic: ipvlan/basic
🚧 ✅ iotop: sanity
🚧 ✅ Usex - version 1.9-29
🚧 ✅ storage: dm/common
Host 3:
⚡ Internal infrastructure issues prevented one or more tests (marked
with ⚡⚡⚡) from running on this architecture.
This is not the fault of the kernel that was tested.
⚡⚡⚡ Boot test
⚡⚡⚡ xfstests: ext4
⚡⚡⚡ xfstests: xfs
⚡⚡⚡ selinux-policy: serge-testsuite
⚡⚡⚡ lvm thinp sanity
⚡⚡⚡ storage: software RAID testing
⚡⚡⚡ stress: stress-ng
🚧 ⚡⚡⚡ IOMMU boot test
🚧 ⚡⚡⚡ IPMI driver test
🚧 ⚡⚡⚡ IPMItool loop stress test
🚧 ⚡⚡⚡ power-management: cpupower/sanity test
🚧 ⚡⚡⚡ Storage blktests
Host 4:
✅ Boot test
✅ Storage SAN device stress - mpt3sas driver
Host 5:
✅ Boot test
✅ xfstests: ext4
✅ xfstests: xfs
✅ selinux-policy: serge-testsuite
✅ lvm thinp sanity
✅ storage: software RAID testing
✅ stress: stress-ng
🚧 ✅ IOMMU boot test
🚧 ✅ IPMI driver test
🚧 ✅ IPMItool loop stress test
🚧 ✅ Storage blktests
Test sources: https://github.com/CKI-project/tests-beaker
💚 Pull requests are welcome for new tests or improvements to existing tests!
Waived tests
------------
If the test run included waived tests, they are marked with 🚧. Such tests are
executed but their results are not taken into account. Tests are waived when
their results are not reliable enough, e.g. when they're just introduced or are
being fixed.
Testing timeout
---------------
We aim to provide a report within reasonable timeframe. Tests that haven't
finished running are marked with ⏱. Reports for non-upstream kernels have
a Beaker recipe linked to next to each host.
From: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
kvm_steal_time_set_preempted() may accidentally clear KVM_VCPU_FLUSH_TLB
bit if it is called more than once while VCPU is preempted.
This is part of CVE-2019-3016.
(This bug was also independently discovered by Jim Mattson
<jmattson(a)google.com>)
Signed-off-by: Boris Ostrovsky <boris.ostrovsky(a)oracle.com>
Reviewed-by: Joao Martins <joao.m.martins(a)oracle.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
arch/x86/kvm/x86.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf91713..8c93691 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3504,6 +3504,9 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
+ if (vcpu->arch.st.steal.preempted)
+ return;
+
vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
--
1.8.3.1