commit e1bf1687740ce1a3598a1c5e452b852ff2190682 upstream.
This reverts commit 870190a9ec9075205c0fa795a09fa931694a3ff1.
It was not a good idea. The custom hash table was a much better
fit for this purpose.
A fast lookup is not essential, in fact for most cases there is no lookup
at all because original tuple is not taken and can be used as-is.
What needs to be fast is insertion and deletion.
rhlist removal however requires a rhlist walk.
We can have thousands of entries in such a list if source port/addresses
are reused for multiple flows, if this happens removal requests are so
expensive that deletions of a few thousand flows can take several
seconds(!).
The advantages that we got from rhashtable are:
1) table auto-sizing
2) multiple locks
1) would be nice to have, but it is not essential as we have at
most one lookup per new flow, so even a million flows in the bysource
table are not a problem compared to current deletion cost.
2) is easy to add to custom hash table.
I tried to add hlist_node to rhlist to speed up rhltable_remove but this
isn't doable without changing semantics. rhltable_remove_fast will
check that the to-be-deleted object is part of the table and that
requires a list walk that we want to avoid.
Furthermore, using hlist_node increases size of struct rhlist_head, which
in turn increases nf_conn size.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=196821
Reported-by: Ivan Babrou <ibobrik(a)gmail.com>
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
---
Hi Greg,
this is a 4.9.y backport of the same revert that already went into 4.13.y;
please consider applying this.
Note: you need to 'git cherry-pick 6e699867f84c0f358fed233fe6162173aca28e04'
first, else this won't apply (and cause crash if newly allocated conntrack is
dropped right away).
include/net/netfilter/nf_conntrack.h | 3 +-
include/net/netfilter/nf_nat.h | 1 -
net/netfilter/nf_nat_core.c | 134 +++++++++++++++--------------------
3 files changed, 57 insertions(+), 81 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d9d52c020a70..9ae819e27940 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -17,7 +17,6 @@
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/atomic.h>
-#include <linux/rhashtable.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/nf_conntrack_dccp.h>
@@ -101,7 +100,7 @@ struct nf_conn {
possible_net_t ct_net;
#if IS_ENABLED(CONFIG_NF_NAT)
- struct rhlist_head nat_bysource;
+ struct hlist_node nat_bysource;
#endif
/* all members below initialized via memset */
u8 __nfct_init_offset[0];
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index c327a431a6f3..02515f7ed4cc 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,6 +1,5 @@
#ifndef _NF_NAT_H
#define _NF_NAT_H
-#include <linux/rhashtable.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/nf_nat.h>
#include <net/netfilter/nf_conntrack_tuple.h>
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index e9fcb58d4eba..624d6e4dcd5c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,19 +30,17 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
+static DEFINE_SPINLOCK(nf_nat_lock);
+
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
-struct nf_nat_conn_key {
- const struct net *net;
- const struct nf_conntrack_tuple *tuple;
- const struct nf_conntrack_zone *zone;
-};
-
-static struct rhltable nf_nat_bysource_table;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -121,17 +119,19 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_tuple *t;
- const struct nf_conn *ct = data;
+ unsigned int hash;
+
+ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
- t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* Original src, to ensure we map it consistently if poss. */
+ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- seed ^= net_hash_mix(nf_ct_net(ct));
- return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
- t->dst.protonum ^ seed);
+ return reciprocal_scale(hash, nf_nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -187,28 +187,6 @@ same_src(const struct nf_conn *ct,
t->src.u.all == tuple->src.u.all);
}
-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
- const void *obj)
-{
- const struct nf_nat_conn_key *key = arg->key;
- const struct nf_conn *ct = obj;
-
- if (!same_src(ct, key->tuple) ||
- !net_eq(nf_ct_net(ct), key->net) ||
- !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
- return 1;
-
- return 0;
-}
-
-static struct rhashtable_params nf_nat_bysource_params = {
- .head_offset = offsetof(struct nf_conn, nat_bysource),
- .obj_hashfn = nf_nat_bysource_hash,
- .obj_cmpfn = nf_nat_bysource_cmp,
- .nelem_hint = 256,
- .min_size = 1024,
-};
-
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
@@ -219,26 +197,22 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
+ unsigned int h = hash_by_src(net, tuple);
const struct nf_conn *ct;
- struct nf_nat_conn_key key = {
- .net = net,
- .tuple = tuple,
- .zone = zone
- };
- struct rhlist_head *hl, *h;
-
- hl = rhltable_lookup(&nf_nat_bysource_table, &key,
- nf_nat_bysource_params);
- rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
- nf_ct_invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- if (in_range(l3proto, l4proto, result, range))
- return 1;
+ hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+ if (same_src(ct, tuple) &&
+ net_eq(net, nf_ct_net(ct)) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+ /* Copy source part from reply tuple. */
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ if (in_range(l3proto, l4proto, result, range))
+ return 1;
+ }
}
-
return 0;
}
@@ -411,6 +385,7 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
struct nf_conn_nat *nat;
@@ -452,19 +427,16 @@ nf_nat_setup_info(struct nf_conn *ct,
}
if (maniptype == NF_NAT_MANIP_SRC) {
- struct nf_nat_conn_key key = {
- .net = nf_ct_net(ct),
- .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- .zone = nf_ct_zone(ct),
- };
- int err;
-
- err = rhltable_insert_key(&nf_nat_bysource_table,
- &key,
- &ct->nat_bysource,
- nf_nat_bysource_params);
- if (err)
- return NF_DROP;
+ unsigned int srchash;
+
+ srchash = hash_by_src(net,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ spin_lock_bh(&nf_nat_lock);
+ /* nf_conntrack_alter_reply might re-allocate extension aera */
+ nat = nfct_nat(ct);
+ hlist_add_head_rcu(&ct->nat_bysource,
+ &nf_nat_bysource[srchash]);
+ spin_unlock_bh(&nf_nat_lock);
}
/* It's done. */
@@ -572,9 +544,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&ct->nat_bysource);
ct->status &= ~IPS_NAT_DONE_MASK;
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ spin_unlock_bh(&nf_nat_lock);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -699,9 +672,11 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
- if (ct->status & IPS_SRC_NAT_DONE)
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ if (ct->status & IPS_SRC_NAT_DONE) {
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&ct->nat_bysource);
+ spin_unlock_bh(&nf_nat_lock);
+ }
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -836,13 +811,16 @@ static int __init nf_nat_init(void)
{
int ret;
- ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
- if (ret)
- return ret;
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+
+ nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+ if (!nf_nat_bysource)
+ return -ENOMEM;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
@@ -866,7 +844,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
@@ -886,8 +864,8 @@ static void __exit nf_nat_cleanup(void)
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
-
- rhltable_destroy(&nf_nat_bysource_table);
+ synchronize_net();
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
}
MODULE_LICENSE("GPL");
--
2.13.6
This is an automatic generated email to let you know that the following patch were queued:
Subject: media: rc: sir_ir: detect presence of port
Author: Sean Young <sean(a)mess.org>
Date: Wed Nov 8 16:19:45 2017 -0500
Without this test, sir_ir clumsy claims resources for a device which
does not exist.
The 0-day kernel test robot reports the following errors (in a loop):
sir_ir sir_ir.0: Trapped in interrupt
genirq: Flags mismatch irq 4. 00000000 (ttyS0) vs. 00000000 (sir_ir)
When sir_ir is loaded with the default io and irq, the following happens:
- sir_ir claims irq 4
- user space opens /dev/ttyS0
- in serial8250_do_startup(), some setup is done for ttyS0, which causes
irq 4 to fire (in THRE test)
- sir_ir does not realise it was not for it, and spins until the "trapped
in interrupt"
- now serial driver calls setup_irq() and fails and we get the
"Flags mismatch" error.
There is no port present at 0x3e8 so simply check for the presence of a
port, as suggested by Linus.
Reported-by: kbuild test robot <fengguang.wu(a)intel.com>
Tested-by: Fengguang Wu <fengguang.wu(a)intel.com>
Signed-off-by: Sean Young <sean(a)mess.org>
Cc: <stable(a)vger.kernel.org> # 4.12+
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)s-opensource.com>
drivers/media/rc/sir_ir.c | 40 ++++++++++++++++++++++++++++++++++++----
1 file changed, 36 insertions(+), 4 deletions(-)
---
diff --git a/drivers/media/rc/sir_ir.c b/drivers/media/rc/sir_ir.c
index 76120664b700..9ee2c9196b4d 100644
--- a/drivers/media/rc/sir_ir.c
+++ b/drivers/media/rc/sir_ir.c
@@ -57,7 +57,7 @@ static void add_read_queue(int flag, unsigned long val);
static irqreturn_t sir_interrupt(int irq, void *dev_id);
static void send_space(unsigned long len);
static void send_pulse(unsigned long len);
-static void init_hardware(void);
+static int init_hardware(void);
static void drop_hardware(void);
/* Initialisation */
@@ -263,11 +263,36 @@ static void send_pulse(unsigned long len)
}
}
-static void init_hardware(void)
+static int init_hardware(void)
{
+ u8 scratch, scratch2, scratch3;
unsigned long flags;
spin_lock_irqsave(&hardware_lock, flags);
+
+ /*
+ * This is a simple port existence test, borrowed from the autoconfig
+ * function in drivers/tty/serial/8250/8250_port.c
+ */
+ scratch = sinp(UART_IER);
+ soutp(UART_IER, 0);
+#ifdef __i386__
+ outb(0xff, 0x080);
+#endif
+ scratch2 = sinp(UART_IER) & 0x0f;
+ soutp(UART_IER, 0x0f);
+#ifdef __i386__
+ outb(0x00, 0x080);
+#endif
+ scratch3 = sinp(UART_IER) & 0x0f;
+ soutp(UART_IER, scratch);
+ if (scratch2 != 0 || scratch3 != 0x0f) {
+ /* we fail, there's nothing here */
+ spin_unlock_irqrestore(&hardware_lock, flags);
+ pr_err("port existence test failed, cannot continue\n");
+ return -ENODEV;
+ }
+
/* reset UART */
outb(0, io + UART_MCR);
outb(0, io + UART_IER);
@@ -285,6 +310,8 @@ static void init_hardware(void)
/* turn on UART */
outb(UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2, io + UART_MCR);
spin_unlock_irqrestore(&hardware_lock, flags);
+
+ return 0;
}
static void drop_hardware(void)
@@ -334,14 +361,19 @@ static int sir_ir_probe(struct platform_device *dev)
pr_err("IRQ %d already in use.\n", irq);
return retval;
}
+
+ retval = init_hardware();
+ if (retval) {
+ del_timer_sync(&timerlist);
+ return retval;
+ }
+
pr_info("I/O port 0x%.4x, IRQ %d.\n", io, irq);
retval = devm_rc_register_device(&sir_ir_dev->dev, rcdev);
if (retval < 0)
return retval;
- init_hardware();
-
return 0;
}
Hi Mark,
On 15-11-2017 10:54, Mark Brown wrote:
> On Wed, Nov 15, 2017 at 02:45:45AM +0000, alexander.levin(a)verizon.com wrote:
>
>> We can no longer rely on the return value of
>> devm_snd_dmaengine_pcm_register(...) to check if the DMA
>> handle is declared in the DT.
>> Previously this check activated PIO mode but currently
>> dma_request_chan returns either a valid channel or -EPROBE_DEFER.
> So when did the corresponding change to the dmaengine API go in?
Looks like this was introduced in a8135d0d79e9 ("dmaengine: core:
Introduce new, universal API to request a channel") but looking
at soc-generic-dmaengine-pcm.c I see that this would never work
anyway because dmaengine_pcm_request_chan_of() also returns
either zero or -EPROBE_DEFER.
>
>> In order to activate PIO mode check instead if the interrupt
>> line is declared. This reflects better what is documented in
>> the DT bindings (see Documentation/devicetree/bindings/sound/
>> designware-i2s.txt).
>> Also, initialize use_pio variable which was never being set
>> causing PIO mode to never work.
> Though if PIO mode never worked presumably this isn't that urgent...
Yeah, and we also have this. Its my fault, I was using a
different tree for testing at the time. But if we could backport
this it would be nice because we and our clients use this PIO
driver in ARC AXS101 Development Platform. (A side note is that
the DT bindings were only recently introduced but I think its
more easier to add the bindings instead of fixing this driver ...).
Best Regards,
Jose Miguel Abreu
The snd_usb_copy_string_desc() retrieves the usb string corresponding to
the index number thought the usb_string(). And for NULL-terminated, insert
'0' by using the return value of usb_string() as the index of buffer to
hold the string.
The problem is that the usb_string() also returns the length of the string
read(>= 0), but it can also return a negative value, the error or status
value of usb_control_msg(). If iClockSource is '0' as shown below,
usb_string() will return -EINVAL. This will result in '0' being inserted
into buf[-22], and the following KASAN out-of-bound error message will be
output.
AudioControl Interface Descriptor:
bLength 8
bDescriptorType 36
bDescriptorSubtype 10 (CLOCK_SOURCE)
bClockID 1
bmAttributes 0x07 Internal programmable Clock (synced to SOF)
bmControls 0x07
Clock Frequency Control (read/write)
Clock Validity Control (read-only)
bAssocTerminal 0
iClockSource 0
To fix out-of-bound error, insert 0 only if the return value of
usb_string() is greater than 0.
==================================================================
BUG: KASAN: stack-out-of-bounds in parse_audio_unit+0x1327/0x1960 [snd_usb_audio]
Write of size 1 at addr ffff88007e66735a by task systemd-udevd/18376
CPU: 0 PID: 18376 Comm: systemd-udevd Not tainted 4.13.0+ #3
Hardware name: LG Electronics 15N540-RFLGL/White Tip Mountain, BIOS 15N5
Call Trace:
dump_stack+0x63/0x8d
print_address_description+0x70/0x290
? parse_audio_unit+0x1327/0x1960 [snd_usb_audio]
kasan_report+0x265/0x350
__asan_store1+0x4a/0x50
parse_audio_unit+0x1327/0x1960 [snd_usb_audio]
? save_stack+0xb5/0xd0
? save_stack_trace+0x1b/0x20
? save_stack+0x46/0xd0
? kasan_kmalloc+0xad/0xe0
? kmem_cache_alloc_trace+0xff/0x230
? snd_usb_create_mixer+0xb0/0x4b0 [snd_usb_audio]
? usb_audio_probe+0x4de/0xf40 [snd_usb_audio]
? usb_probe_interface+0x1f5/0x440
? driver_probe_device+0x3ed/0x660
? build_feature_ctl+0xb10/0xb10 [snd_usb_audio]
? save_stack_trace+0x1b/0x20
? init_object+0x69/0xa0
? snd_usb_find_csint_desc+0xa8/0xf0 [snd_usb_audio]
snd_usb_mixer_controls+0x1dc/0x370 [snd_usb_audio]
? build_audio_procunit+0x890/0x890 [snd_usb_audio]
? snd_usb_create_mixer+0xb0/0x4b0 [snd_usb_audio]
? kmem_cache_alloc_trace+0xff/0x230
? usb_ifnum_to_if+0xbd/0xf0
snd_usb_create_mixer+0x25b/0x4b0 [snd_usb_audio]
? snd_usb_create_stream+0x255/0x2c0 [snd_usb_audio]
usb_audio_probe+0x4de/0xf40 [snd_usb_audio]
? snd_usb_autosuspend.part.7+0x30/0x30 [snd_usb_audio]
? __pm_runtime_idle+0x90/0x90
? kernfs_activate+0xa6/0xc0
? usb_match_one_id_intf+0xdc/0x130
? __pm_runtime_set_status+0x2d4/0x450
usb_probe_interface+0x1f5/0x440
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Jaejoong Kim <climbbb.kim(a)gmail.com>
---
The AudioControl Interface Descriptor in commit message is from
lsusb output with real usb audio DAC.
The usb audio product causing the OOB are as follows:
http://www.lg.com/uk/lg-friends/lg-AFD-1200
It only prints OOB error and usb audio works well. :)
sound/usb/mixer.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index e630813..5a83c2c 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -204,7 +204,8 @@ static int snd_usb_copy_string_desc(struct mixer_build *state,
int index, char *buf, int maxlen)
{
int len = usb_string(state->chip->dev, index, buf, maxlen - 1);
- buf[len] = 0;
+ if (len > 0)
+ buf[len] = 0;
return len;
}
--
2.7.4