linux-kernel - Re: [PATCH 2/2] kunit: tool: print summary of failed tests if a few failed out of a lot

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CABVgOSkXE-nYnWyJmCdEZ0b1xCBj-p_cpE5YzBCZvW4cdk8LOw@mail.gmail.com>
Date:   Sun, 30 Oct 2022 22:04:49 +0800
From:   David Gow <davidgow@...gle.com>
To:     Daniel Latypov <dlatypov@...gle.com>
Cc:     brendanhiggins@...gle.com, rmoar@...gle.com,
        linux-kernel@...r.kernel.org, kunit-dev@...glegroups.com,
        linux-kselftest@...r.kernel.org, skhan@...uxfoundation.org
Subject: Re: [PATCH 2/2] kunit: tool: print summary of failed tests if a few
 failed out of a lot

On Sat, Oct 29, 2022 at 5:03 AM Daniel Latypov <dlatypov@...gle.com> wrote:
>
> E.g. all the hw_breakpoint tests are failing right now.
> So if I run `kunit.py run --altests --arch=x86_64`, then I see
> > Testing complete. Ran 408 tests: passed: 392, failed: 9, skipped: 7
>
> Seeing which 9 tests failed out of the hundreds is annoying.
> If my terminal doesn't have scrollback support, I have to resort to
> looking at `.kunit/test.log` for the `not ok` lines.
>
> Teach kunit.py to print a summarized list of failures if the # of tests
> reachs an arbitrary threshold (>=100 tests).
>
> To try and keep the output from being too long/noisy, this new logic
> a) just reports "parent_test failed" if every child test failed
> b) won't print anything if there are >10 failures (also arbitrary).
>
> With this patch, we get an extra line of output showing:
> > Testing complete. Ran 408 tests: passed: 392, failed: 9, skipped: 7
> > Failures: hw_breakpoint
>
> This also works with parameterized tests, e.g. if I add a fake failure
> > Failures: kcsan.test_atomic_builtins_missing_barrier.threads=6
>
> Note: we didn't have enough tests for this to be a problem before.
> But with commit 980ac3ad0512 ("kunit: tool: rename all_test_uml.config,
> use it for --alltests"), --alltests works and thus running >100 tests
> will probably become more common.
>
> Signed-off-by: Daniel Latypov <dlatypov@...gle.com>
> ---

I like it! I do think we'll ultimately want some more options for the
main results display as well (e.g., only display failed tests, limit
the depth of nested results, etc), but this would be useful even then,
as the number of tests displayed could still be large. (And you might
not know what failures you'd be looking for in advance.)

Reviewed-by: David Gow <davidgow@...gle.com>

Cheers,
-- David

>  tools/testing/kunit/kunit_parser.py    | 47 ++++++++++++++++++++++++++
>  tools/testing/kunit/kunit_tool_test.py | 22 ++++++++++++
>  2 files changed, 69 insertions(+)
>
> diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
> index 1ae873e3e341..94dba66feec5 100644
> --- a/tools/testing/kunit/kunit_parser.py
> +++ b/tools/testing/kunit/kunit_parser.py
> @@ -58,6 +58,10 @@ class Test:
>                 self.counts.errors += 1
>                 stdout.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}')
>
> +       def ok_status(self) -> bool:
> +               """Returns true if the status was ok, i.e. passed or skipped."""
> +               return self.status in (TestStatus.SUCCESS, TestStatus.SKIPPED)
> +
>  class TestStatus(Enum):
>         """An enumeration class to represent the status of a test."""
>         SUCCESS = auto()
> @@ -565,6 +569,40 @@ def print_test_footer(test: Test) -> None:
>         stdout.print_with_timestamp(format_test_divider(message,
>                 len(message) - stdout.color_len()))
>
> +
> +
> +def _summarize_failed_tests(test: Test) -> str:
> +       """Tries to summarize all the failing subtests in `test`."""
> +
> +       def failed_names(test: Test, parent_name: str) -> List[str]:
> +               # Note: we use 'main' internally for the top-level test.
> +               if not parent_name or parent_name == 'main':
> +                       full_name = test.name
> +               else:
> +                       full_name = parent_name + '.' + test.name
> +
> +               if not test.subtests:  # this is a leaf node
> +                       return [full_name]
> +
> +               # If all the children failed, just say this subtest failed.
> +               # Don't summarize it down "the top-level test failed", though.
> +               failed_subtests = [sub for sub in test.subtests if not sub.ok_status()]
> +               if parent_name and len(failed_subtests) ==  len(test.subtests):
> +                       return [full_name]
> +
> +               all_failures = []  # type: List[str]
> +               for t in failed_subtests:
> +                       all_failures.extend(failed_names(t, full_name))
> +               return all_failures
> +
> +       failures = failed_names(test, '')
> +       # If there are too many failures, printing them out will just be noisy.
> +       if len(failures) > 10:  # this is an arbitrary limit
> +               return ''
> +
> +       return 'Failures: ' + ', '.join(failures)
> +
> +
>  def print_summary_line(test: Test) -> None:
>         """
>         Prints summary line of test object. Color of line is dependent on
> @@ -587,6 +625,15 @@ def print_summary_line(test: Test) -> None:
>                 color = stdout.red
>         stdout.print_with_timestamp(color(f'Testing complete. {test.counts}'))
>
> +       # Summarize failures that might have gone off-screen since we had a lot
> +       # of tests (arbitrarily defined as >=100 for now).
> +       if test.ok_status() or test.counts.total() < 100:
> +               return
> +       summarized = _summarize_failed_tests(test)
> +       if not summarized:
> +               return
> +       stdout.print_with_timestamp(color(summarized))
> +
>  # Other methods:
>
>  def bubble_up_test_results(test: Test) -> None:
> diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
> index a6e53945656e..7dcd67003b23 100755
> --- a/tools/testing/kunit/kunit_tool_test.py
> +++ b/tools/testing/kunit/kunit_tool_test.py
> @@ -312,6 +312,28 @@ class KUnitParserTest(unittest.TestCase):
>                                 result.status)
>                         self.assertEqual('kunit-resource-test', result.subtests[0].name)
>
> +       def test_summarize_failures(self):
> +               output = """
> +               KTAP version 1
> +               1..2
> +                       # Subtest: all_failed_suite
> +                       1..2
> +                       not ok 1 - test1
> +                       not ok 2 - test2
> +               not ok 1 - all_failed_suite
> +                       # Subtest: some_failed_suite
> +                       1..2
> +                       ok 1 - test1
> +                       not ok 2 - test2
> +               not ok 1 - some_failed_suite
> +               """
> +               result = kunit_parser.parse_run_tests(output.splitlines())
> +               self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status)
> +
> +               self.assertEqual(kunit_parser._summarize_failed_tests(result),
> +                       'Failures: all_failed_suite, some_failed_suite.test2')
> +
> +
>  def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream:
>         return kunit_parser.LineStream(enumerate(strs, start=1))
>
> --
> 2.38.1.273.g43a17bfeac-goog
>

Download attachment "smime.p7s" of type "application/pkcs7-signature" (4003 bytes)