lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171106191242.fbcsf5br2kohtpu3@revenge.us.oracle.com>
Date:   Mon, 6 Nov 2017 13:12:42 -0600
From:   Tom Saeger <tom.saeger@...cle.com>
To:     Joe Perches <joe@...ches.com>
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Tom Saeger <tom.saeger@...cle.com>,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH] get_maintainer: Add a couple more --self-test options

Hi Joe,
    This is good!  I had something similar cooking - specifically for SCM validation.

My SCM attempt caught a few more issues:
    - check git branch if specified
    - check validitiy of "T:" entry, otherwise warn of malformed entry.

Example malformed (current next has two instances):

9740 T:      git://git.infradead.org/nvme.git

Should be:
9740 T:      git git://git.infradead.org/nvme.git


Also - I believe you intended on warning on all bad SCM entries, not just newly discovered ones?
Your change correctly finds a previously $isbad, however the print is enclosed in an else preventing output.

I was going to inline these, but in my haste to understand in incorporate changes I sanitized whitespace
(BTW - I see both tabs and spaces, which is preferred in this file?)

The below git branch special-casing is for these:
567:T:  git git://people.freedesktop.org/~airlied/linux (part of drm maint)
3671:T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)

See bottom for my suggestions.

--Tom



On Mon, Nov 06, 2017 at 09:27:25AM -0800, Joe Perches wrote:
> Check for duplicate section headers and link reachability.
> 
> Miscellanea:
> 
> o Add --self-test=<foo> options (sections, patterns and scm for now)
>   where the default without options is all tests
> o Rename check_maintainers_patterns to self_test
> o Rename self_test_pattern_info to self_test_info
> 
> Signed-off-by: Joe Perches <joe@...ches.com>
> cc: Tom Saeger <tom.saeger@...cle.com>
> ---
>  scripts/get_maintainer.pl | 114 +++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 97 insertions(+), 17 deletions(-)
> 
> diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
> index c68a5d1ba709..748bff0790a8 100755
> --- a/scripts/get_maintainer.pl
> +++ b/scripts/get_maintainer.pl
> @@ -57,7 +57,7 @@ my $sections = 0;
>  my $file_emails = 0;
>  my $from_filename = 0;
>  my $pattern_depth = 0;
> -my $self_test = 0;
> +my $self_test = undef;
>  my $version = 0;
>  my $help = 0;
>  my $find_maintainer_files = 0;
> @@ -221,7 +221,7 @@ if (-f $ignore_file) {
>  
>  if ($#ARGV > 0) {
>      foreach (@ARGV) {
> -        if ($_ eq "-self-test" || $_ eq "--self-test") {
> +        if ($_ =~ /^-{1,2}self-test(?:=|$)/) {
>              die "$P: using --self-test does not allow any other option or argument\n";
>          }
>      }
> @@ -263,7 +263,7 @@ if (!GetOptions(
>  		'fe|file-emails!' => \$file_emails,
>  		'f|file' => \$from_filename,
>  		'find-maintainer-files' => \$find_maintainer_files,
> -		'self-test' => \$self_test,
> +		'self-test:s' => \$self_test,
>  		'v|version' => \$version,
>  		'h|help|usage' => \$help,
>  		)) {
> @@ -280,9 +280,9 @@ if ($version != 0) {
>      exit 0;
>  }
>  
> -if ($self_test) {
> +if (defined $self_test) {
>      read_all_maintainer_files();
> -    check_maintainers_patterns();
> +    self_test();
>      exit 0;
>  }
>  
> @@ -329,7 +329,7 @@ if (!top_of_kernel_tree($lk_path)) {
>  my @typevalue = ();
>  my %keyword_hash;
>  my @mfiles = ();
> -my @self_test_pattern_info = ();
> +my @self_test_info = ();
>  
>  sub read_maintainer_file {
>      my ($file) = @_;
> @@ -339,6 +339,7 @@ sub read_maintainer_file {
>      my $i = 1;
>      while (<$maint>) {
>  	my $line = $_;
> +	chomp $line;
>  
>  	if ($line =~ m/^([A-Z]):\s*(.*)/) {
>  	    my $type = $1;
> @@ -353,17 +354,16 @@ sub read_maintainer_file {
>  		if ((-d $value)) {
>  		    $value =~ s@([^/])$@$1/@;
>  		}
> -		if ($self_test) {
> -			push(@self_test_pattern_info, {file=>$file, line=>$line, linenr=>$i, pat=>$value});
> -		}
>  	    } elsif ($type eq "K") {
>  		$keyword_hash{@...evalue} = $value;
>  	    }
>  	    push(@typevalue, "$type:$value");
>  	} elsif (!(/^\s*$/ || /^\s*\#/)) {
> -	    $line =~ s/\n$//g;
>  	    push(@typevalue, $line);
>  	}
> +	if (defined $self_test) {
> +	    push(@self_test_info, {file=>$file, linenr=>$i, line=>$line});
> +	}
>  	$i++;
>      }
>      close($maint);
> @@ -614,17 +614,97 @@ if ($web) {
>  
>  exit($exit);
>  
> -sub check_maintainers_patterns {
> +sub self_test {
>      my @lsfiles = ();
> +    my @good_links = ();
> +    my @bad_links = ();
> +    my @section_headers = ();
>  
>      @lsfiles = vcs_list_files($lk_path);
>  
> -    for my $x (@self_test_pattern_info) {
> -        if (!grep(m@...->{pat}@, @lsfiles)) {
> -            my $line = $x->{line};
> -            chomp($line);
> -            print("$x->{file}:$x->{linenr}: warning: no matches $line\n");
> -        }
> +    for my $x (@self_test_info) {
> +
> +	## Section header duplication
> +	if (($self_test eq "" || $self_test =~ /\bsections\b/) &&
> +	    $x->{line} =~ /^\S[^:]/) {
> +	    if (grep(m@^\Q$x->{line}\E@, @section_headers)) {
> +		print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n");
> +	    } else {
> +		push(@section_headers, $x->{line});
> +	    }
> +	}
> +	next if ($x->{line} !~ /^([A-Z]):\s*(.*)/);
> +
> +	my $type = $1;
> +	my $value = $2;
> +
> +	## Filename pattern matching
> +	if (($type eq "F" || $type eq "X") &&
> +	    ($self_test eq "" || $self_test =~ /\bpatterns\b/)) {
> +	    $value =~ s@\.@\\\.@g;       ##Convert . to \.
> +	    $value =~ s/\*/\.\*/g;       ##Convert * to .*
> +	    $value =~ s/\?/\./g;         ##Convert ? to .
> +	    ##if pattern is a directory and it lacks a trailing slash, add one
> +	    if ((-d $value)) {
> +		$value =~ s@([^/])$@$1/@;
> +	    }
> +	    if (!grep(m@...alue@, @lsfiles)) {
> +		print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n");
> +	    }
> +
> +	## Link reachability
> +	} elsif (($type eq "W" ||
> +		  $type eq "B" && $value =~ /^https?:/) &&
> +		 ($self_test eq "" || $self_test =~ /\blinks\b/)) {
> +	    next if (grep(m@^\Q$value\E$@, @good_links));
> +	    my $isbad = 0;
> +	    if (grep(m@^\Q$value\E$@, @bad_links)) {
> +	        $isbad = 1;
> +	    } else {
> +		my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`;
> +		if ($? == 0) {
> +		    push(@good_links, $value);
> +		} else {
> +		    push(@bad_links, $value);
> +		    $isbad = 1;
> +		}
> +	    }
> +	    if ($isbad) {
> +	        print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
> +	    }
> +
> +	## SCM reachability
> +	} elsif (($type eq "T" && $value =~ /^(?:git|quilt|hg)\s+\S/) &&
> +		 ($self_test eq "" || $self_test =~ /\bscm\b/)) {
> +	    next if (grep(m@^\Q$value\E$@, @good_links));
> +	    my $isbad = 0;
> +	    if (grep(m@^\Q$value\E$@, @bad_links)) {
> +	        $isbad = 1;
> +	    } else {
> +		if ($value =~ /^git\s+(\S+)/) {
> +		    my $url = $1;
> +		    my $output = `git ls-remote --exit-code -h "$url" > /dev/null 2>&1`;
> +		    if ($? == 0) {
> +			push(@good_links, $value);
> +		    } else {
> +			push(@bad_links, $value);
> +			$isbad = 1;
> +		    }
> +		} elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
> +		    my $url = $1;
> +		    my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
> +		    if ($? == 0) {
> +			push(@good_links, $value);
> +		    } else {
> +			push(@bad_links, $value);
> +			$isbad = 1;
> +		    }
> +		}
> +		if ($isbad) {
> +		    print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
> +		}
> +	    }
>      }
>  }
>  
> -- 
> 2.15.0
> 

Changed SCM portion to this, which picks up a few more warnings...
Checks git branch on remote if specified.
Perhaps a $ismalformed category or some other way to deal with malformed entries?  Or just
move up to first check of SCM?

         ## SCM reachability
         } elsif (($type eq "T") && ($self_test eq "" || $self_test =~ /\bscm\b/)) {
             next if (grep(m@^\Q$value\E$@, @good_links));
             my $isbad = 0;
             if (grep(m@^\Q$value\E$@, @bad_links)) {
                 $isbad = 1;
             } else {
                 if ($value !~ /^(?:git|quilt|hg)\s+\S/) {
                     print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n");
                 } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) {
                     my $url = $1;
                     my $branch = "";
                     $branch = $3 if $3;
                     my $output = `git ls-remote --exit-code -h "$url" $branch> /dev/null 2>&1`;
                     if ($? == 0) {
                         push(@good_links, $value);
                     } else {
                         push(@bad_links, $value);
                         $isbad = 1;
                     }
                 } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) {
                     my $url = $1;
                     my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`;
                     if ($? == 0) {
                         push(@good_links, $value);
                     } else {
                         push(@bad_links, $value);
                         $isbad = 1;
                     }
                 }
             }
             if ($isbad) {
                 print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n");
             }
         }
     }
  }

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ