Dear All,
When running dhcp tests using the 4.4.y (and 4.4.y-cip kernel as well), I encountered an issue where the dhcp client in the kernel could not get an IP address when multiple network devices were enabled. It seems that the current implementation of the dhcp client in the 4.4 kernel is send dhcp request via device 1 -> wait <1s for response from server on device 1 -> if no response, switch to device 2 -> repeat process on device 2 ...etc. When the dhcp server is slow to respond, this means it is impossible to get a dhcp address.
This series backported from upstream fixes the issue, is it possible to apply this to 4.4.y and/or 4.4.y-cip?
Thanks, Patryk
Geert Uytterhoeven (1): net: ipconfig: Fix NULL pointer dereference on RARP/BOOTP/DHCP timeout
Thierry Reding (1): net: ipconfig: Fix more use after free
Uwe Kleine-König (3): net: ipconfig: Support using "delayed" DHCP replies net: ipconfig: drop inter-device timeout net: ipconfig: fix use after free
net/ipv4/ipconfig.c | 61 ++++++++++++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 33 deletions(-)
From: Uwe Kleine-König u.kleine-koenig@pengutronix.de
commit 2647cffb2bc6fbed163d377390eb7ca552c7c1cb upstream.
The dhcp code only waits 1s between sending DHCP requests on different devices and only accepts an answer for the device that sent out the last request. Only the timeout at the end of a loop is increased iteratively which favours only the last device. This makes it impossible to work with a dhcp server that takes little more than 1s connected to a device that is not the last one.
Instead of also increasing the inter-device timeout, teach the code to handle delayed replies.
To accomplish that, make *ic_dev track the current ic_device instead of the current net_device and adapt all users accordingly. The relevant change then is to reset d to ic_dev on a reply to assert that the followup request goes through the right device.
Signed-off-by: Uwe Kleine-König u.kleine-koenig@pengutronix.de Signed-off-by: David S. Miller davem@davemloft.net [Patryk: Backported to 4.4: replaced DBG function with pr_debug] Signed-off-by: Patryk Mungai patryk.mungai-ndungu.kx@renesas.com --- net/ipv4/ipconfig.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 60f564d..f326433 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -195,7 +195,7 @@ struct ic_device { };
static struct ic_device *ic_first_dev __initdata; /* List of open device */ -static struct net_device *ic_dev __initdata; /* Selected device */ +static struct ic_device *ic_dev __initdata; /* Selected device */
static bool __init ic_is_init_dev(struct net_device *dev) { @@ -314,8 +314,8 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if (dev != ic_dev && !netdev_uses_dsa(dev)) { - DBG(("IP-Config: Downing %s\n", dev->name)); + if (dev != ic_dev->dev && !netdev_uses_dsa(dev)) { + pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } kfree(d); @@ -379,7 +379,7 @@ static int __init ic_setup_if(void) int err;
memset(&ir, 0, sizeof(ir)); - strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name); + strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name); set_sockaddr(sin, ic_myaddr, 0); if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface address (%d)\n", @@ -403,7 +403,7 @@ static int __init ic_setup_if(void) * out, we'll try to muddle along. */ if (ic_dev_mtu != 0) { - strcpy(ir.ifr_name, ic_dev->name); + strcpy(ir.ifr_name, ic_dev->dev->name); ir.ifr_mtu = ic_dev_mtu; if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0) pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n", @@ -574,7 +574,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt goto drop_unlock;
/* We have a winner! */ - ic_dev = dev; + ic_dev = d; if (ic_myaddr == NONE) ic_myaddr = tip; ic_servaddr = sip; @@ -661,8 +661,6 @@ static struct packet_type bootp_packet_type __initdata = { .func = ic_bootp_recv, };
-static __be32 ic_dev_xid; /* Device under configuration */ - /* * Initialize DHCP/BOOTP extension fields in the request. */ @@ -1052,12 +1050,6 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str goto drop_unlock; }
- /* Is it a reply for the device we are configuring? */ - if (b->xid != ic_dev_xid) { - net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n"); - goto drop_unlock; - } - /* Parse extensions */ if (ext_len >= 4 && !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */ @@ -1148,7 +1140,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str }
/* We have a winner! */ - ic_dev = dev; + ic_dev = d; ic_myaddr = b->your_ip; ic_servaddr = b->server_ip; ic_addrservaddr = b->iph.saddr; @@ -1243,9 +1235,6 @@ static int __init ic_dynamic(void) timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); for (;;) { #ifdef IPCONFIG_BOOTP - /* Track the device we are configuring */ - ic_dev_xid = d->xid; - if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); #endif @@ -1263,6 +1252,8 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; + /* continue on device that got the reply */ + d = ic_dev; pr_cont(","); continue; } @@ -1513,7 +1504,7 @@ static int __init ip_auto_config(void) #endif /* IPCONFIG_DYNAMIC */ } else { /* Device selected manually or only one device -> use it */ - ic_dev = ic_first_dev->dev; + ic_dev = ic_first_dev; }
addr = root_nfs_parse_addr(root_server_path); @@ -1548,7 +1539,7 @@ static int __init ip_auto_config(void) pr_info("IP-Config: Complete:\n");
pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n", - ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr, + ic_dev->dev->name, ic_dev->dev->addr_len, ic_dev->dev->dev_addr, &ic_myaddr, &ic_netmask, &ic_gateway); pr_info(" host=%s, domain=%s, nis-domain=%s\n", utsname()->nodename, ic_domain, utsname()->domainname);
From: Uwe Kleine-König u.kleine-koenig@pengutronix.de
commit e068853409aa17208e94f4ca628005cc6f51e043 upstream.
Now that ipconfig learned to handle "delayed replies" in the previous commit, there is no reason any more to delay sending a first request per device.
Signed-off-by: Uwe Kleine-König u.kleine-koenig@pengutronix.de Signed-off-by: David S. Miller davem@davemloft.net [Patryk: cherry-picked to 4.4] Signed-off-by: Patryk Mungai patryk.mungai-ndungu.kx@renesas.com --- net/ipv4/ipconfig.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index f326433..35c8b35 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -94,7 +94,6 @@ /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ #define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ #define CONF_SEND_RETRIES 6 /* Send six requests per open */ -#define CONF_INTER_TIMEOUT (HZ) /* Inter-device timeout: 1 second */ #define CONF_BASE_TIMEOUT (HZ*2) /* Initial timeout: 2 seconds */ #define CONF_TIMEOUT_RANDOM (HZ) /* Maximum amount of randomization */ #define CONF_TIMEOUT_MULT *7/4 /* Rate of timeout growth */ @@ -1243,9 +1242,11 @@ static int __init ic_dynamic(void) ic_rarp_send_if(d); #endif
- jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout); - while (time_before(jiffies, jiff) && !ic_got_reply) - schedule_timeout_uninterruptible(1); + if (!d->next) { + jiff = jiffies + timeout; + while (time_before(jiffies, jiff) && !ic_got_reply) + schedule_timeout_uninterruptible(1); + } #ifdef IPCONFIG_DHCP /* DHCP isn't done until we get a DHCPACK. */ if ((ic_got_reply & IC_BOOTP) &&
From: Uwe Kleine-König u.kleine-koenig@pengutronix.de
commit 9c706a49d660653625d206f6972541c1f60ea2b0 upstream.
ic_close_devs() calls kfree() for all devices's ic_device. Since commit 2647cffb2bc6 ("net: ipconfig: Support using "delayed" DHCP replies") the active device's ic_device is still used however to print the ipconfig summary which results in an oops if the memory is already changed. So delay freeing until after the autoconfig results are reported.
Fixes: 2647cffb2bc6 ("net: ipconfig: Support using "delayed" DHCP replies") Reported-by: Geert Uytterhoeven geert@linux-m68k.org Signed-off-by: Uwe Kleine-König u.kleine-koenig@pengutronix.de Tested-by: Geert Uytterhoeven geert+renesas@glider.be Signed-off-by: David S. Miller davem@davemloft.net [Patryk: cherry-picked to 4.4] Signed-off-by: Patryk Mungai patryk.mungai-ndungu.kx@renesas.com --- net/ipv4/ipconfig.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 35c8b35..23c9b8a 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1519,14 +1519,6 @@ static int __init ip_auto_config(void) return -1;
/* - * Close all network devices except the device we've - * autoconfigured and set up routes. - */ - ic_close_devs(); - if (ic_setup_if() < 0 || ic_setup_routes() < 0) - return -1; - - /* * Record which protocol was actually used. */ #ifdef IPCONFIG_DYNAMIC @@ -1560,6 +1552,15 @@ static int __init ip_auto_config(void) pr_cont("\n"); #endif /* !SILENT */
+ /* + * Close all network devices except the device we've + * autoconfigured and set up routes. + */ + ic_close_devs(); + if (ic_setup_if() < 0 || ic_setup_routes() < 0) + return -1; + + return 0; }
From: Thierry Reding treding@nvidia.com
commit d2d371ae5dd6af9a6a3d7f50b753627c42868409 upstream.
While commit 9c706a49d660 ("net: ipconfig: fix use after free") avoids the use after free, the resulting code still ends up calling both the ic_setup_if() and ic_setup_routes() after calling ic_close_devs(), and access to the device is still required.
Move the call to ic_close_devs() to the very end of the function.
Signed-off-by: Thierry Reding treding@nvidia.com Signed-off-by: David S. Miller davem@davemloft.net [Patryk: cherry-picked to 4.4] Signed-off-by: Patryk Mungai patryk.mungai-ndungu.kx@renesas.com --- net/ipv4/ipconfig.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 23c9b8a..6b78590 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1556,12 +1556,14 @@ static int __init ip_auto_config(void) * Close all network devices except the device we've * autoconfigured and set up routes. */ - ic_close_devs(); if (ic_setup_if() < 0 || ic_setup_routes() < 0) - return -1; + err = -1; + else + err = 0;
+ ic_close_devs();
- return 0; + return err; }
late_initcall(ip_auto_config);
From: Geert Uytterhoeven geert+renesas@glider.be
commit 1ae292a2457cd692828da2be87cb967260993ad0 upstream.
If no RARP, BOOTP, or DHCP response is received, ic_dev is never set, causing a NULL pointer dereference in ic_close_devs():
Sending DHCP requests ...... timed out! Unable to handle kernel NULL pointer dereference at virtual address 00000004
To fix this, add a check to avoid dereferencing ic_dev if it is still NULL.
Signed-off-by: Geert Uytterhoeven geert+renesas@glider.be Fixes: 2647cffb2bc6fbed ("net: ipconfig: Support using "delayed" DHCP replies") Signed-off-by: David S. Miller davem@davemloft.net [Patryk: cherry-picked to 4.4] Signed-off-by: Patryk Mungai patryk.mungai-ndungu.kx@renesas.com --- net/ipv4/ipconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 6b78590..ca658af 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -313,7 +313,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if (dev != ic_dev->dev && !netdev_uses_dsa(dev)) { + if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); }
Hi!
When running dhcp tests using the 4.4.y (and 4.4.y-cip kernel as well), I encountered an issue where the dhcp client in the kernel could not get an IP address when multiple network devices were enabled. It seems that the current implementation of the dhcp client in the 4.4 kernel is send dhcp request via device 1 -> wait <1s for response from server on device 1 -> if no response, switch to device 2 -> repeat process on device 2 ...etc. When the dhcp server is slow to respond, this means it is impossible to get a dhcp address.
This series backported from upstream fixes the issue, is it possible to apply this to 4.4.y and/or 4.4.y-cip?
Ok, so first patch adds support for using "delayed" DHCP replies, then there are three more patches to fix up issues it creates.
Which tells me that maybe this is not quite suitable for -stable.
How long do your dhcp servers take to reply? Can you solve the problem some other way, like for example increasing timeouts?
Thanks, Pavel
Hello Pavel,
-----Original Message----- From: stable-owner@vger.kernel.org stable-owner@vger.kernel.org On Behalf Of Pavel Machek Sent: 06 April 2019 11:39 To: Patryk Mungai Ndungu patryk.mungai-ndungu.kx@renesas.com Cc: stable@vger.kernel.org; davem@davemloft.net; cip-dev@lists.cip- project.org Subject: Re: [cip-dev] [PATCH 4.4 0/5] DHCP client support when receiving "delayed" replies
Hi!
When running dhcp tests using the 4.4.y (and 4.4.y-cip kernel as well), I encountered an issue where the dhcp client in the kernel could not get an IP address when multiple network devices were enabled. It seems that the current implementation of the dhcp client in the 4.4 kernel is send dhcp request via device 1 -> wait <1s for response from server on device 1 -> if no response, switch to device 2 -> repeat process on device 2 ...etc. When the dhcp server is slow to respond, this means it is impossible to get a dhcp address.
This series backported from upstream fixes the issue, is it possible to apply this to 4.4.y and/or 4.4.y-cip?
Ok, so first patch adds support for using "delayed" DHCP replies, then there are three more patches to fix up issues it creates.
Which tells me that maybe this is not quite suitable for -stable.
How long do your dhcp servers take to reply?
It varies: the fastest reply I've seen is 0.12931s, the slowest is in the region is just over 1.007s. In the tests I've run, I measured the time between the kernel sending out the DHCP request and receiving a DHCP offer from the server. After running 50 tests, in around half of them, it takes just over 1s for the DHCP offer to arrive. But this is rarely over 1s +1 jiffie, hence why I think the code is able to cope with it most of the time.
However, the DHCP servers network is not at all loaded (at most only has 4 devices trying to connect to it at once), and yet I've seen this failure multiple times, so I'm not sure what would happen in a loaded network. I think at least for CIP we need a kernel that is able to cope with however long the server takes to reply.
Can you solve the problem some other way, like for example increasing timeouts?
I've tried increasing CONF_INTER_TIMEOUT to 2Hz and I haven't seen it fail in 50 boots. Though this is an simple workaround, it can prolong boot up time and the DHCP client is still time dependent with regards to listening for a reply on a network device.
Thanks, Patryk
Thanks, Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On Fri, Apr 05, 2019 at 11:47:31AM +0100, Patryk Mungai wrote:
Dear All,
When running dhcp tests using the 4.4.y (and 4.4.y-cip kernel as well), I encountered an issue where the dhcp client in the kernel could not get an IP address when multiple network devices were enabled. It seems that the current implementation of the dhcp client in the 4.4 kernel is send dhcp request via device 1 -> wait <1s for response from server on device 1 -> if no response, switch to device 2 -> repeat process on device 2 ...etc. When the dhcp server is slow to respond, this means it is impossible to get a dhcp address.
This series backported from upstream fixes the issue, is it possible to apply this to 4.4.y and/or 4.4.y-cip?
This is something that seems more suited for the CIP kernel as it adds new functionality.
-- Thanks, Sasha
On Sat, Apr 06, 2019 at 09:53:50AM -0400, Sasha Levin wrote:
On Fri, Apr 05, 2019 at 11:47:31AM +0100, Patryk Mungai wrote:
Dear All,
When running dhcp tests using the 4.4.y (and 4.4.y-cip kernel as well), I encountered an issue where the dhcp client in the kernel could not get an IP address when multiple network devices were enabled. It seems that the current implementation of the dhcp client in the 4.4 kernel is send dhcp request via device 1 -> wait <1s for response from server on device 1 -> if no response, switch to device 2 -> repeat process on device 2 ...etc. When the dhcp server is slow to respond, this means it is impossible to get a dhcp address.
This series backported from upstream fixes the issue, is it possible to apply this to 4.4.y and/or 4.4.y-cip?
This is something that seems more suited for the CIP kernel as it adds new functionality.
I agree, this is a new feature. If someone wants this, they should use a newer kernel please. Like 4.19.y :)
thanks,
greg k-h
linux-stable-mirror@lists.linaro.org