lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <9D9B7DF1-3F32-4277-BFC7-CDD155B6869B@gmail.com>
Date:   Wed, 9 Dec 2020 19:29:28 +0200
From:   Martin Zaharinov <micron10@...il.com>
To:     Guillaume Nault <gnault@...hat.com>
Cc:     "linux-kernel@...r kernel. org" <linux-kernel@...r.kernel.org>,
        Eric Dumazet <eric.dumazet@...il.com>, netdev@...r.kernel.org
Subject: Re: Urgent: BUG: PPP ioctl Transport endpoint is not connected

I make diff linux 4.14.211 and 4.15 kernel

And changes is:

atomic_inc to refcount_inc 

And on other part of code in ppp_generic.c remove skb_free ….



You see diff down : 


--- linux-4.14.211/drivers/net/ppp/ppp_generic.c	2020-12-08 09:17:35.000000000 +0000
+++ linux-4.15/drivers/net/ppp/ppp_generic.c	2018-01-28 21:20:33.000000000 +0000
@@ -51,6 +51,7 @@
 #include <asm/unaligned.h>
 #include <net/slhc_vj.h>
 #include <linux/atomic.h>
+#include <linux/refcount.h>

 #include <linux/nsproxy.h>
 #include <net/net_namespace.h>
@@ -84,7 +85,7 @@ struct ppp_file {
 	struct sk_buff_head xq;		/* pppd transmit queue */
 	struct sk_buff_head rq;		/* receive queue for pppd */
 	wait_queue_head_t rwait;	/* for poll on reading /dev/ppp */
-	atomic_t	refcnt;		/* # refs (incl /dev/ppp attached) */
+	refcount_t	refcnt;		/* # refs (incl /dev/ppp attached) */
 	int		hdrlen;		/* space to leave for headers */
 	int		index;		/* interface unit / channel number */
 	int		dead;		/* unit/channel has been shut down */
@@ -256,7 +257,7 @@ struct ppp_net {
 /* Prototypes. */
 static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
 			struct file *file, unsigned int cmd, unsigned long arg);
-static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb);
+static void ppp_xmit_process(struct ppp *ppp);
 static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
 static void ppp_push(struct ppp *ppp);
 static void ppp_channel_push(struct channel *pch);
@@ -389,7 +390,7 @@ static int ppp_open(struct inode *inode,
 	/*
 	 * This could (should?) be enforced by the permissions on /dev/ppp.
 	 */
-	if (!capable(CAP_NET_ADMIN))
+	if (!ns_capable(file->f_cred->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -408,7 +409,7 @@ static int ppp_release(struct inode *unu
 				unregister_netdevice(ppp->dev);
 			rtnl_unlock();
 		}
-		if (atomic_dec_and_test(&pf->refcnt)) {
+		if (refcount_dec_and_test(&pf->refcnt)) {
 			switch (pf->kind) {
 			case INTERFACE:
 				ppp_destroy_interface(PF_TO_PPP(pf));
@@ -512,12 +513,13 @@ static ssize_t ppp_write(struct file *fi
 		goto out;
 	}

+	skb_queue_tail(&pf->xq, skb);
+
 	switch (pf->kind) {
 	case INTERFACE:
-		ppp_xmit_process(PF_TO_PPP(pf), skb);
+		ppp_xmit_process(PF_TO_PPP(pf));
 		break;
 	case CHANNEL:
-		skb_queue_tail(&pf->xq, skb);
 		ppp_channel_push(PF_TO_CHANNEL(pf));
 		break;
 	}
@@ -880,7 +882,7 @@ static int ppp_unattached_ioctl(struct n
 		mutex_lock(&pn->all_ppp_mutex);
 		ppp = ppp_find_unit(pn, unit);
 		if (ppp) {
-			atomic_inc(&ppp->file.refcnt);
+			refcount_inc(&ppp->file.refcnt);
 			file->private_data = &ppp->file;
 			err = 0;
 		}
@@ -895,7 +897,7 @@ static int ppp_unattached_ioctl(struct n
 		spin_lock_bh(&pn->all_channels_lock);
 		chan = ppp_find_channel(pn, unit);
 		if (chan) {
-			atomic_inc(&chan->file.refcnt);
+			refcount_inc(&chan->file.refcnt);
 			file->private_data = &chan->file;
 			err = 0;
 		}
@@ -960,6 +962,8 @@ static __net_exit void ppp_exit_net(stru

 	mutex_destroy(&pn->all_ppp_mutex);
 	idr_destroy(&pn->units_idr);
+	WARN_ON_ONCE(!list_empty(&pn->all_channels));
+	WARN_ON_ONCE(!list_empty(&pn->new_channels));
 }

 static struct pernet_operations ppp_net_ops = {
@@ -1263,8 +1267,8 @@ ppp_start_xmit(struct sk_buff *skb, stru
 	put_unaligned_be16(proto, pp);

 	skb_scrub_packet(skb, !net_eq(ppp->ppp_net, dev_net(dev)));
-	ppp_xmit_process(ppp, skb);
-
+	skb_queue_tail(&ppp->file.xq, skb);
+	ppp_xmit_process(ppp);
 	return NETDEV_TX_OK;

  outf:
@@ -1349,7 +1353,7 @@ static int ppp_dev_init(struct net_devic
 	 * that ppp_destroy_interface() won't run before the device gets
 	 * unregistered.
 	 */
-	atomic_inc(&ppp->file.refcnt);
+	refcount_inc(&ppp->file.refcnt);

 	return 0;
 }
@@ -1378,7 +1382,7 @@ static void ppp_dev_priv_destructor(stru
 	struct ppp *ppp;

 	ppp = netdev_priv(dev);
-	if (atomic_dec_and_test(&ppp->file.refcnt))
+	if (refcount_dec_and_test(&ppp->file.refcnt))
 		ppp_destroy_interface(ppp);
 }

@@ -1416,14 +1420,13 @@ static void ppp_setup(struct net_device
  */

 /* Called to do any work queued up on the transmit side that can now be done */
-static void __ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
+static void __ppp_xmit_process(struct ppp *ppp)
 {
+	struct sk_buff *skb;
+
 	ppp_xmit_lock(ppp);
 	if (!ppp->closing) {
 		ppp_push(ppp);
-
-		if (skb)
-			skb_queue_tail(&ppp->file.xq, skb);
 		while (!ppp->xmit_pending &&
 		       (skb = skb_dequeue(&ppp->file.xq)))
 			ppp_send_frame(ppp, skb);
@@ -1433,13 +1436,11 @@ static void __ppp_xmit_process(struct pp
 			netif_wake_queue(ppp->dev);
 		else
 			netif_stop_queue(ppp->dev);
-	} else {
-		kfree_skb(skb);
 	}
 	ppp_xmit_unlock(ppp);
 }

-static void ppp_xmit_process(struct ppp *ppp, struct sk_buff *skb)
+static void ppp_xmit_process(struct ppp *ppp)
 {
 	local_bh_disable();

@@ -1447,7 +1448,7 @@ static void ppp_xmit_process(struct ppp
 		goto err;

 	(*this_cpu_ptr(ppp->xmit_recursion))++;
-	__ppp_xmit_process(ppp, skb);
+	__ppp_xmit_process(ppp);
 	(*this_cpu_ptr(ppp->xmit_recursion))--;

 	local_bh_enable();
@@ -1457,8 +1458,6 @@ static void ppp_xmit_process(struct ppp
 err:
 	local_bh_enable();

-	kfree_skb(skb);
-
 	if (net_ratelimit())
 		netdev_err(ppp->dev, "recursion detected\n");
 }
@@ -1943,7 +1942,7 @@ static void __ppp_channel_push(struct ch
 	if (skb_queue_empty(&pch->file.xq)) {
 		ppp = pch->ppp;
 		if (ppp)
-			__ppp_xmit_process(ppp, NULL);
+			__ppp_xmit_process(ppp);
 	}
 }

@@ -2682,7 +2681,7 @@ ppp_unregister_channel(struct ppp_channe

 	pch->file.dead = 1;
 	wake_up_interruptible(&pch->file.rwait);
-	if (atomic_dec_and_test(&pch->file.refcnt))
+	if (refcount_dec_and_test(&pch->file.refcnt))
 		ppp_destroy_channel(pch);
 }

@@ -3052,7 +3051,7 @@ init_ppp_file(struct ppp_file *pf, int k
 	pf->kind = kind;
 	skb_queue_head_init(&pf->xq);
 	skb_queue_head_init(&pf->rq);
-	atomic_set(&pf->refcnt, 1);
+	refcount_set(&pf->refcnt, 1);
 	init_waitqueue_head(&pf->rwait);
 }

@@ -3162,15 +3161,6 @@ ppp_connect_channel(struct channel *pch,
 		goto outl;

 	ppp_lock(ppp);
-	spin_lock_bh(&pch->downl);
-	if (!pch->chan) {
-		/* Don't connect unregistered channels */
-		spin_unlock_bh(&pch->downl);
-		ppp_unlock(ppp);
-		ret = -ENOTCONN;
-		goto outl;
-	}
-	spin_unlock_bh(&pch->downl);
 	if (pch->file.hdrlen > ppp->file.hdrlen)
 		ppp->file.hdrlen = pch->file.hdrlen;
 	hdrlen = pch->file.hdrlen + 2;	/* for protocol bytes */
@@ -3179,7 +3169,7 @@ ppp_connect_channel(struct channel *pch,
 	list_add_tail(&pch->clist, &ppp->channels);
 	++ppp->n_channels;
 	pch->ppp = ppp;
-	atomic_inc(&ppp->file.refcnt);
+	refcount_inc(&ppp->file.refcnt);
 	ppp_unlock(ppp);
 	ret = 0;

@@ -3210,7 +3200,7 @@ ppp_disconnect_channel(struct channel *p
 		if (--ppp->n_channels == 0)
 			wake_up_interruptible(&ppp->file.rwait);
 		ppp_unlock(ppp);
-		if (atomic_dec_and_test(&ppp->file.refcnt))
+		if (refcount_dec_and_test(&ppp->file.refcnt))
 			ppp_destroy_interface(ppp);
 		err = 0;
 	}

> On 9 Dec 2020, at 18:57, Martin Zaharinov <micron10@...il.com> wrote:
> 
> Hi Nault 
> 
> 
> 
>> On 9 Dec 2020, at 18:40, Guillaume Nault <gnault@...hat.com> wrote:
>> 
>> On Wed, Dec 09, 2020 at 04:47:52PM +0200, Martin Zaharinov wrote:
>>> Hi All
>>> 
>>> I have problem with latest kernel release 
>>> And the problem is base on this late problem :
>>> 
>>> 
>>> https://www.mail-archive.com/search?l=netdev@vger.kernel.org&q=subject:%22Re%5C%3A+ppp%5C%2Fpppoe%2C+still+panic+4.15.3+in+ppp_push%22&o=newest&f=1
>>> 
>>> I have same problem in kernel 5.6 > now I use kernel 5.9.13 and have same problem.
>>> 
>>> 
>>> In kernel 5.9.13 now don’t have any crashes in dimes but in one moment accel service stop with defunct and in log have many of this line :
>>> 
>>> 
>>> error: vlan608: ioctl(PPPIOCCONNECT): Transport endpoint is not connected
>>> error: vlan617: ioctl(PPPIOCCONNECT): Transport endpoint is not connected
>>> error: vlan679: ioctl(PPPIOCCONNECT): Transport endpoint is not connected
>>> 
>>> In one moment connected user bump double or triple and after that service defunct and need wait to drop all session to start .
>>> 
>>> I talk with accel-ppp team and they said this is kernel related problem and to back to kernel 4.14 there is not this problem.
>>> 
>>> Problem is come after kernel 4.15 > and not have solution to this moment.
>> 
>> I'm sorry, I don't understand.
>> Do you mean that v4.14 worked fine (no crash, no ioctl() error)?
>> Did the problem start appearing in v4.15? Or did v4.15 work and the
>> problem appeared in v4.16?
> 
> In Telegram group I talk with Sergey and Dimka and told my the problem is come after changes from 4.14 to 4.15 
> Sergey write this : "as I know, there was a similar issue in kernel 4.15 so maybe it is still not fixed”
> 
> I don’t have options to test with this old kernel 4.14.xxx i don’t have support for them.
> 
> 
>> 
>>> Please help to find the problem.
>>> 
>>> Last time in link I see is make changes in ppp_generic.c 
>>> 
>>> ppp_lock(ppp);
>>>       spin_lock_bh(&pch->downl);
>>>       if (!pch->chan) {
>>>               /* Don't connect unregistered channels */
>>>               spin_unlock_bh(&pch->downl);
>>>               ppp_unlock(ppp);
>>>               ret = -ENOTCONN;
>>>               goto outl;
>>>       }
>>>       spin_unlock_bh(&pch->downl);
>>> 
>>> 
>>> But this fix only to don’t display error and freeze system 
>>> The problem is stay and is to big.
>> 
>> Do you use accel-ppp's unit-cache option? Does the problem go away if
>> you stop using it?
>> 
> 
> No I don’t use unit-cache , if I set unit-cache accel-ppp defunct same but user Is connect and disconnet more fast.
> 
> The problem is same with unit and without . 
> Only after this patch I don’t see error in dimes but this is not solution.
> In network have customer what have power cut problem, when drop 600 user and back Is normal but in this moment kernel is locking and start to make this : 
> sessions:
>  starting: 4235
>  active: 3882
>  finishing: 378
> 
> The problem is starting session is not real user normal user in this server is ~4k customers .
> 
> I use pppd_compat .
> 
> Any idea ?
> 
>>> 
>>> Please help to fix.
> Martin

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ