[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <680cf896280c4_193a06294a6@willemb.c.googlers.com.notmuch>
Date: Sat, 26 Apr 2025 11:15:34 -0400
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: Jakub Kicinski <kuba@...nel.org>,
davem@...emloft.net
Cc: netdev@...r.kernel.org,
edumazet@...gle.com,
pabeni@...hat.com,
andrew+netdev@...n.ch,
horms@...nel.org,
Jakub Kicinski <kuba@...nel.org>,
petrm@...dia.com,
willemb@...gle.com,
sdf@...ichev.me,
linux-kselftest@...r.kernel.org
Subject: Re: [PATCH net-next] selftests: net: exit cleanly on SIGTERM /
timeout
Jakub Kicinski wrote:
> ksft runner sends 2 SIGTERMs in a row if a test runs out of time.
> Handle this in a similar way we handle SIGINT - cleanup and stop
> running further tests.
>
> Because we get 2 signals we need a bit of logic to ignore
> the subsequent one, they come immediately one after the other
> (due to commit 9616cb34b08e ("kselftest/runner.sh: Propagate SIGTERM
> to runner child")).
>
> This change makes sure we run cleanup (scheduled defer()s)
> and also print a stack trace on SIGTERM, which doesn't happen
> by default. Tests occasionally hang in NIPA and it's impossible
> to tell what they are waiting from or doing.
>
> Signed-off-by: Jakub Kicinski <kuba@...nel.org>
> ---
> CC: petrm@...dia.com
> CC: willemb@...gle.com
> CC: sdf@...ichev.me
> CC: linux-kselftest@...r.kernel.org
> ---
> tools/testing/selftests/net/lib/py/ksft.py | 27 +++++++++++++++++++++-
> 1 file changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
> index 3cfad0fd4570..73710634d457 100644
> --- a/tools/testing/selftests/net/lib/py/ksft.py
> +++ b/tools/testing/selftests/net/lib/py/ksft.py
> @@ -3,6 +3,7 @@
> import builtins
> import functools
> import inspect
> +import signal
> import sys
> import time
> import traceback
> @@ -26,6 +27,10 @@ KSFT_DISRUPTIVE = True
> pass
>
>
> +class KsftTerminate(KeyboardInterrupt):
> + pass
> +
> +
> def ksft_pr(*objs, **kwargs):
> print("#", *objs, **kwargs)
>
> @@ -193,6 +198,19 @@ KSFT_DISRUPTIVE = True
> return env
>
>
> +term_cnt = 0
> +
A bit ugly to initialize this here. Also, it already is initialized
below.
> +def _ksft_intr(signum, frame):
> + # ksft runner.sh sends 2 SIGTERMs in a row on a timeout
> + # if we don't ignore the second one it will stop us from handling cleanup
> + global term_cnt
> + term_cnt += 1
> + if term_cnt == 1:
> + raise KsftTerminate()
> + else:
> + ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
> +
> +
> def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
> cases = cases or []
>
> @@ -205,6 +223,10 @@ KSFT_DISRUPTIVE = True
> cases.append(value)
> break
>
> + global term_cnt
> + term_cnt = 0
> + prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
> +
> totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
>
> print("TAP version 13")
> @@ -229,11 +251,12 @@ KSFT_DISRUPTIVE = True
> cnt_key = 'xfail'
> except BaseException as e:
> stop |= isinstance(e, KeyboardInterrupt)
> + stop |= isinstance(e, KsftTerminate)
> tb = traceback.format_exc()
> for line in tb.strip().split('\n'):
> ksft_pr("Exception|", line)
> if stop:
> - ksft_pr("Stopping tests due to KeyboardInterrupt.")
> + ksft_pr(f"Stopping tests due to {type(e).__name__}.")
> KSFT_RESULT = False
> cnt_key = 'fail'
>
> @@ -248,6 +271,8 @@ KSFT_DISRUPTIVE = True
> if stop:
> break
>
> + signal.signal(signal.SIGTERM, prev_sigterm)
> +
Why is prev_sigterm saved and reassigned as handler here?
> print(
> f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
> )
> --
> 2.49.0
>
Powered by blists - more mailing lists