From f5492666a3b62344de9026a960c11888160362c9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 Sep 2009 17:04:13 -0700 Subject: scripts/get_maintainer.pl: add --git-blame Julia Lawall suggested that get_maintainers.pl should have the ability to include signatories of commits that are modified by a particular patch. Vegard Nossum did something similar once. http://lkml.org/lkml/2008/5/29/449 The modified script looks the commits for all lines in the patch, and includes the "-by:" signatories for those commits. It uses the same git-min-percent, git-max-maintainers, and git-min-signatures options. git-since is ignored. It can be used independently from the --git default, so ./scripts/get_maintainers.pl --nogit --git-blame or ./scripts/get_maintainers.pl --nogit --git-blame -f is acceptable. If used with -f , all lines/commits for the file are checked. --git-blame can be slow if used with -f --git-blame does not work with -f Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 138 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 109 insertions(+), 29 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 278a45bd45a..35781e0d43e 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -13,7 +13,7 @@ use strict; my $P = $0; -my $V = '0.17'; +my $V = '0.18beta2'; use Getopt::Long qw(:config no_auto_abbrev); @@ -29,6 +29,7 @@ my $email_git_min_signatures = 1; my $email_git_max_maintainers = 5; my $email_git_min_percent = 5; my $email_git_since = "1-year-ago"; +my $email_git_blame = 0; my $output_multiline = 1; my $output_separator = ", "; my $scm = 0; @@ -68,6 +69,7 @@ if (!GetOptions( 'git-max-maintainers=i' => \$email_git_max_maintainers, 'git-min-percent=i' => \$email_git_min_percent, 'git-since=s' => \$email_git_since, + 'git-blame!' => \$email_git_blame, 'm!' => \$email_maintainer, 'n!' => \$email_usename, 'l!' => \$email_list, @@ -107,8 +109,9 @@ if ($selections == 0) { die "$P: Missing required option: email, scm, status, subsystem or web\n"; } -if ($email && ($email_maintainer + $email_list + $email_subscriber_list - + $email_git + $email_git_penguin_chiefs) == 0) { +if ($email && + ($email_maintainer + $email_list + $email_subscriber_list + + $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) { usage(); die "$P: Please select at least 1 email option\n"; } @@ -150,6 +153,7 @@ close(MAINT); ## use the filenames on the command line or find the filenames in the patchfiles my @files = (); +my @range = (); foreach my $file (@ARGV) { ##if $file is a directory and it lacks a trailing slash, add one @@ -162,13 +166,19 @@ foreach my $file (@ARGV) { push(@files, $file); } else { my $file_cnt = @files; + my $lastfile; open(PATCH, "<$file") or die "$P: Can't open ${file}\n"; while () { if (m/^\+\+\+\s+(\S+)/) { my $filename = $1; $filename =~ s@^[^/]*/@@; $filename =~ s@\n@@; + $lastfile = $filename; push(@files, $filename); + } elsif (m/^\@\@ -(\d+),(\d+)/) { + if ($email_git_blame) { + push(@range, "$lastfile:$1:$2"); + } } } close(PATCH); @@ -226,6 +236,9 @@ foreach my $file (@files) { recent_git_signoffs($file); } + if ($email && $email_git_blame) { + git_assign_blame($file); + } } if ($email) { @@ -311,6 +324,7 @@ MAINTAINER field selection options: --git-max-maintainers => maximum maintainers to add (default: 5) --git-min-percent => minimum percentage of commits required (default: 5) --git-since => git history to use (default: 1-year-ago) + --git-blame => use git blame to find modified commits for patch or file --m => include maintainer(s) if any --n => include name 'Full Name ' --l => include list(s) if any @@ -333,13 +347,15 @@ Other options: Notes: Using "-f directory" may give unexpected results: - - Used with "--git", git signators for _all_ files in and below - directory are examined as git recurses directories. - Any specified X: (exclude) pattern matches are _not_ ignored. - Used with "--nogit", directory is used as a pattern match, - no individual file within the directory or subdirectory - is matched. + Used with "--git", git signators for _all_ files in and below + directory are examined as git recurses directories. + Any specified X: (exclude) pattern matches are _not_ ignored. + Used with "--nogit", directory is used as a pattern match, + no individual file within the directory or subdirectory + is matched. + Used with "--git-blame", does not iterate all files in directory + Using "--git-blame" is slow and may add old committers and authors + that are no longer active maintainers to the output. EOT } @@ -449,14 +465,19 @@ sub push_email_address { my ($email_address) = @_; my $email_name = ""; - if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) { - $email_name = $1; - $email_address = $2; - } if ($email_maintainer) { - if ($email_usename && $email_name) { - push(@email_to, format_email($email_name, $email_address)); + if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) { + $email_name = $1; + $email_address = $2; + if ($email_usename) { + push(@email_to, format_email($email_name, $email_address)); + } else { + push(@email_to, $email_address); + } + } elsif ($email_address =~ m/<(.+)>/) { + $email_address = $1; + push(@email_to, $email_address); } else { push(@email_to, $email_address); } @@ -545,20 +566,79 @@ sub recent_git_signoffs { last; } } - if ($line =~ m/(.+)<(.+)>/) { - my $git_name = $1; - my $git_addr = $2; - if ($email_usename) { - push(@email_to, format_email($git_name, $git_addr)); - } else { - push(@email_to, $git_addr); - } - } elsif ($line =~ m/<(.+)>/) { - my $git_addr = $1; - push(@email_to, $git_addr); - } else { - push(@email_to, $line); + push_email_address($line); + } +} + +sub save_commits { + my ($cmd, @commits) = @_; + my $output; + my @lines = (); + + $output = `${cmd}`; + + @lines = split("\n", $output); + foreach my $line (@lines) { + if ($line =~ m/^(\w+) /) { + push (@commits, $1); + } + } + return @commits; +} + +sub git_assign_blame { + my ($file) = @_; + + my @lines = (); + my @commits = (); + my $cmd; + my $output; + my %hash; + my $total_sign_offs; + my $count; + + if (@range) { + foreach my $file_range_diff (@range) { + next if (!($file_range_diff =~ m/(.+):(.+):(.+)/)); + my $diff_file = $1; + my $diff_start = $2; + my $diff_length = $3; + next if (!("$file" eq "$diff_file")); + $cmd = "git blame -l -L $diff_start,+$diff_length $file\n"; + @commits = save_commits($cmd, @commits); } + } else { + if (-f $file) { + $cmd = "git blame -l $file\n"; + @commits = save_commits($cmd, @commits); + } + } + + $total_sign_offs = 0; + @commits = uniq(@commits); + foreach my $commit (@commits) { + $cmd = "git log -1 ${commit}"; + $cmd .= " | grep -Ei \"^[-_ a-z]+by:.*\\\@.*\$\""; + if (!$email_git_penguin_chiefs) { + $cmd .= " | grep -Ev \"${penguin_chiefs}\""; + } + $cmd .= " | cut -f2- -d\":\""; + + $output = `${cmd}`; + $output =~ s/^\s*//gm; + @lines = split("\n", $output); + $hash{$_}++ for @lines; + $total_sign_offs += @lines; + } + + $count = 0; + foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { + my $sign_offs = $hash{$line}; + $count++; + last if ($sign_offs < $email_git_min_signatures || + $count > $email_git_max_maintainers || + $sign_offs * 100 / $total_sign_offs < $email_git_min_percent); + push_email_address($line); } } -- cgit v1.2.3-70-g09d2 From 1d606b4e0bf8fe45e3f88543dfce83207ae0027d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 Sep 2009 17:04:14 -0700 Subject: scripts/get_maintainer.pl: add sections in pattern match depth order Before this change, matched sections were added in the order of appearance in the normally alphabetic section order of the MAINTAINERS file. For instance, finding the maintainer for drivers/scsi/wd7000.c would first find "SCSI SUBSYSTEM", then "WD7000 SCSI SUBSYSTEM", then "THE REST". before patch: $ ./scripts/get_maintainer.pl --nogit -f drivers/scsi/wd7000.c James E.J. Bottomley Miroslav Zagorac linux-scsi@vger.kernel.org linux-kernel@vger.kernel.org get_maintainer.pl now selects matched sections by longest pattern match. Longest is the number of "/"s and any specific file pattern. This changes the example output order of MAINTAINERS to whatever is selected in "WD7000 SUBSYSTEM", then "SCSI SYSTEM", then "THE REST". after patch: $ ./scripts/get_maintainer.pl --nogit -f drivers/scsi/wd7000.c Miroslav Zagorac James E.J. Bottomley linux-scsi@vger.kernel.org linux-kernel@vger.kernel.org Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 35781e0d43e..fb446e0f8bb 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -211,6 +211,7 @@ foreach my $file (@files) { if ($type eq 'X') { if (file_match_pattern($file, $value)) { $exclude = 1; + last; } } } @@ -218,18 +219,24 @@ foreach my $file (@files) { if (!$exclude) { my $tvi = 0; + my %hash; foreach my $line (@typevalue) { if ($line =~ m/^(\C):\s*(.*)/) { my $type = $1; my $value = $2; if ($type eq 'F') { if (file_match_pattern($file, $value)) { - add_categories($tvi); + my $pattern_depth = ($value =~ tr@/@@); + $pattern_depth++ if (!(substr($value,-1,1) eq "/")); + $hash{$tvi} = $pattern_depth; } } } $tvi++; } + foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { + add_categories($line); + } } if ($email && $email_git) { -- cgit v1.2.3-70-g09d2 From 3fb55652b9f754990e286723f209ce3c07c96d69 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 Sep 2009 17:04:17 -0700 Subject: scripts/get_maintainer.pl: add --pattern-depth --pattern-depth is used to control how many levels of directory traversal should be performed to find maintainers. default is 0 (all directory levels). For instance: MAINTAINERS currently has multiple M: and F: entries that match net/netfilter/ipvs/ip_vs_app.c IPVS M: Wensong Zhang M: Simon Horman M: Julian Anastasov [...] F: net/netfilter/ipvs/ NETFILTER/IPTABLES/IPCHAINS [...] M: Patrick McHardy [...] F: net/netfilter/ NETWORKING [GENERAL] M: "David S. Miller" [...] F: net/ THE REST M: Linus Torvalds [...] F: */ Using this command will return all of those maintainers: (except Linus unless --git-chief-maintainers is specified) $ ./scripts/get_maintainer.pl --nogit -nol \ -f net/netfilter/ipvs/ip_vs_app.c Julian Anastasov Simon Horman Wensong Zhang Patrick McHardy David S. Miller Adding --pattern-depth=1 will match at the deepest level $ ./scripts/get_maintainer.pl --nogit -nol --pattern-depth=1 \ -f net/netfilter/ipvs/ip_vs_app.c Julian Anastasov Simon Horman Wensong Zhang Adding --pattern-depth=2 will match at the deepest level and 1 higher $ ./scripts/get_maintainer.pl --nogit -nol --pattern-depth=2 \ -f net/netfilter/ipvs/ip_vs_app.c Julian Anastasov Simon Horman Wensong Zhang Patrick McHardy and so on. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index fb446e0f8bb..5132949500c 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -13,7 +13,7 @@ use strict; my $P = $0; -my $V = '0.18beta2'; +my $V = '0.19'; use Getopt::Long qw(:config no_auto_abbrev); @@ -37,6 +37,7 @@ my $web = 0; my $subsystem = 0; my $status = 0; my $from_filename = 0; +my $pattern_depth = 0; my $version = 0; my $help = 0; @@ -80,6 +81,7 @@ if (!GetOptions( 'status!' => \$status, 'scm!' => \$scm, 'web!' => \$web, + 'pattern-depth=i' => \$pattern_depth, 'f|file' => \$from_filename, 'v|version' => \$version, 'h|help' => \$help, @@ -226,9 +228,13 @@ foreach my $file (@files) { my $value = $2; if ($type eq 'F') { if (file_match_pattern($file, $value)) { - my $pattern_depth = ($value =~ tr@/@@); - $pattern_depth++ if (!(substr($value,-1,1) eq "/")); - $hash{$tvi} = $pattern_depth; + my $value_pd = ($value =~ tr@/@@); + my $file_pd = ($file =~ tr@/@@); + $value_pd++ if (substr($value,-1,1) ne "/"); + if ($pattern_depth == 0 || + (($file_pd - $value_pd) < $pattern_depth)) { + $hash{$tvi} = $value_pd; + } } } } @@ -345,13 +351,14 @@ Output type options: --separator [, ] => separator for multiple entries on 1 line --multiline => print 1 entry per line -Default options: - [--email --git --m --n --l --multiline] - Other options: + --pattern-depth => Number of pattern directory traversals (default: 0 (all)) --version => show version --help => show this help information +Default options: + [--email --git --m --n --l --multiline --pattern-depth=0] + Notes: Using "-f directory" may give unexpected results: Used with "--git", git signators for _all_ files in and below -- cgit v1.2.3-70-g09d2 From 0e70e83dfd40cac47e1fc3e2f1c7b893ea0cd2f8 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 Sep 2009 17:04:20 -0700 Subject: scripts/get_maintainer.pl: better email routines, use perl not shell where possible Added format_email and parse_email routines to reduce inline use. Added email_address_inuse to eliminate multiple maintainer entries for the same email address, the first name encountered is used. Used internal perl equivalents of shell cmd use of grep|cut|sort|uniq Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 152 +++++++++++++++++++++++++++++----------------- 1 file changed, 96 insertions(+), 56 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 5132949500c..1200d724e73 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -13,7 +13,7 @@ use strict; my $P = $0; -my $V = '0.19'; +my $V = '0.20'; use Getopt::Long qw(:config no_auto_abbrev); @@ -258,11 +258,8 @@ if ($email) { foreach my $chief (@penguin_chief) { if ($chief =~ m/^(.*):(.*)/) { my $email_address; - if ($email_usename) { - $email_address = format_email($1, $2); - } else { - $email_address = $2; - } + + $email_address = format_email($1, $2); if ($email_git_penguin_chiefs) { push(@email_to, $email_address); } else { @@ -400,21 +397,57 @@ sub top_of_kernel_tree { return 0; } -sub format_email { - my ($name, $email) = @_; +sub parse_email { + my ($formatted_email) = @_; + + my $name = ""; + my $address = ""; + + if ($formatted_email =~ /^([^<]+)<(.*\@.*)>$/) { + $name = $1; + $address = $2; + } elsif ($formatted_email =~ /^<(.*\@.*)>$/) { + $address = $1; + } elsif ($formatted_email =~ /^(.*\@.*)$/) { + $address = $1; + } $name =~ s/^\s+|\s+$//g; $name =~ s/^\"|\"$//g; - $email =~ s/^\s+|\s+$//g; + $address =~ s/^\s+|\s+$//g; - my $formatted_email = ""; + if ($name =~ /[^a-z0-9 \.\-]/i) { ##has "must quote" chars + $name =~ s/(?"; + $name = "\"$name\""; + } + + if ($email_usename) { + if ("$name" eq "") { + $formatted_email = "$address"; + } else { + $formatted_email = "$name <${address}>"; + } } else { - $formatted_email = "${name} \<${email}\>"; + $formatted_email = $address; } + return $formatted_email; } @@ -444,19 +477,18 @@ sub add_categories { } } } elsif ($ptype eq "M") { - my $p_used = 0; - if ($index >= 0) { - my $tv = $typevalue[$index - 1]; - if ($tv =~ m/^(\C):\s*(.*)/) { - if ($1 eq "P") { - if ($email_usename) { - push_email_address(format_email($2, $pvalue)); - $p_used = 1; + my ($name, $address) = parse_email($pvalue); + if ($name eq "") { + if ($index >= 0) { + my $tv = $typevalue[$index - 1]; + if ($tv =~ m/^(\C):\s*(.*)/) { + if ($1 eq "P") { + $name = $2; } } } } - if (!$p_used) { + if ($email_maintainer) { push_email_addresses($pvalue); } } elsif ($ptype eq "T") { @@ -475,26 +507,24 @@ sub add_categories { } } +sub email_address_inuse { + my ($test_address) = @_; + + foreach my $line (@email_to) { + my ($name, $address) = parse_email($line); + + return 1 if ($address eq $test_address); + } + return 0; +} + sub push_email_address { - my ($email_address) = @_; + my ($line) = @_; - my $email_name = ""; + my ($name, $address) = parse_email($line); - if ($email_maintainer) { - if ($email_address =~ m/([^<]+)<(.*\@.*)>$/) { - $email_name = $1; - $email_address = $2; - if ($email_usename) { - push(@email_to, format_email($email_name, $email_address)); - } else { - push(@email_to, $email_address); - } - } elsif ($email_address =~ m/<(.+)>/) { - $email_address = $1; - push(@email_to, $email_address); - } else { - push(@email_to, $email_address); - } + if (!email_address_inuse($address)) { + push(@email_to, format_email($name, $address)); } } @@ -535,6 +565,7 @@ sub recent_git_signoffs { my $output = ""; my $count = 0; my @lines = (); + my %hash; my $total_sign_offs; if (which("git") eq "") { @@ -548,25 +579,31 @@ sub recent_git_signoffs { } $cmd = "git log --since=${email_git_since} -- ${file}"; - $cmd .= " | grep -Ei \"^[-_ a-z]+by:.*\\\@.*\$\""; - if (!$email_git_penguin_chiefs) { - $cmd .= " | grep -Ev \"${penguin_chiefs}\""; - } - $cmd .= " | cut -f2- -d\":\""; - $cmd .= " | sort | uniq -c | sort -rn"; $output = `${cmd}`; $output =~ s/^\s*//gm; @lines = split("\n", $output); - $total_sign_offs = 0; + @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines); + if (!$email_git_penguin_chiefs) { + @lines = grep(!/${penguin_chiefs}/i, @lines); + } + # cut -f2- -d":" + s/.*:\s*(.+)\s*/$1/ for (@lines); + + @lines = mailmap(@lines); + + $total_sign_offs = @lines; + @lines = sort(@lines); + # uniq -c foreach my $line (@lines) { - if ($line =~ m/([0-9]+)\s+(.*)/) { - $total_sign_offs += $1; - } else { - die("$P: Unexpected git output: ${line}\n"); - } + $hash{$line}++; + } + # sort -rn + @lines = (); + foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { + push(@lines,"$hash{$line} $line"); } foreach my $line (@lines) { @@ -579,8 +616,8 @@ sub recent_git_signoffs { $sign_offs * 100 / $total_sign_offs < $email_git_min_percent) { last; } + push_email_address($line); } - push_email_address($line); } } @@ -632,15 +669,18 @@ sub git_assign_blame { @commits = uniq(@commits); foreach my $commit (@commits) { $cmd = "git log -1 ${commit}"; - $cmd .= " | grep -Ei \"^[-_ a-z]+by:.*\\\@.*\$\""; - if (!$email_git_penguin_chiefs) { - $cmd .= " | grep -Ev \"${penguin_chiefs}\""; - } - $cmd .= " | cut -f2- -d\":\""; $output = `${cmd}`; $output =~ s/^\s*//gm; @lines = split("\n", $output); + + @lines = grep(/^[-_ a-z]+by:.*\@.*$/i, @lines); + if (!$email_git_penguin_chiefs) { + @lines = grep(!/${penguin_chiefs}/i, @lines); + } + # cut -f2- -d":" + s/.*:\s*(.+)\s*/$1/ for (@lines); + $hash{$_}++ for @lines; $total_sign_offs += @lines; } -- cgit v1.2.3-70-g09d2 From 8cbb3a77e1a91073fb279a495a11d5093461dfe5 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 Sep 2009 17:04:21 -0700 Subject: scripts/get_maintainer.pl: add .mailmap use, shell and email cleanups Add reading and using .mailmap file if it exists Convert address entries in .mailmap to first encountered address Don't terminate shell commands with \n Strip characters found after sign-offs by: name
[stripped] Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 72 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 7 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 1200d724e73..8b80b5abb86 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -152,6 +152,36 @@ while () { } close(MAINT); +my %mailmap; + +open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n"; +while () { + my $line = $_; + + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); + + my ($name, $address) = parse_email($line); + $line = format_email($name, $address); + + next if ($line =~ m/^\s*$/); + + if (exists($mailmap{$name})) { + my $obj = $mailmap{$name}; + push(@$obj, $address); + } else { + my @arr = ($address); + $mailmap{$name} = \@arr; + } +} +close(MAILMAP); + +foreach my $name (sort {$mailmap{$a} <=> $mailmap{$b}} keys %mailmap) { + my $obj = $mailmap{$name}; + foreach my $address (@$obj) { + } +} + ## use the filenames on the command line or find the filenames in the patchfiles my @files = (); @@ -403,12 +433,12 @@ sub parse_email { my $name = ""; my $address = ""; - if ($formatted_email =~ /^([^<]+)<(.*\@.*)>$/) { + if ($formatted_email =~ /^([^<]+)<(.*\@.*)>.*$/) { $name = $1; $address = $2; - } elsif ($formatted_email =~ /^<(.*\@.*)>$/) { + } elsif ($formatted_email =~ /^\s*<(.*\@.*)>.*$/) { $address = $1; - } elsif ($formatted_email =~ /^(.*\@.*)$/) { + } elsif ($formatted_email =~ /^\s*(.*\@.*)$/) { $address = $1; } @@ -557,6 +587,29 @@ sub which { return ""; } +sub mailmap { + my @lines = @_; + my %hash; + + foreach my $line (@lines) { + my ($name, $address) = parse_email($line); + if (!exists($hash{$name})) { + $hash{$name} = $address; + } + if (exists($mailmap{$name})) { + my $obj = $mailmap{$name}; + foreach my $map_address (@$obj) { + if (($map_address eq $address) && + ($map_address ne $hash{$name})) { + $line = format_email($name, $hash{$name}); + } + } + } + } + + return @lines; +} + sub recent_git_signoffs { my ($file) = @_; @@ -592,9 +645,10 @@ sub recent_git_signoffs { # cut -f2- -d":" s/.*:\s*(.+)\s*/$1/ for (@lines); + $total_sign_offs = @lines; + @lines = mailmap(@lines); - $total_sign_offs = @lines; @lines = sort(@lines); # uniq -c foreach my $line (@lines) { @@ -655,12 +709,12 @@ sub git_assign_blame { my $diff_start = $2; my $diff_length = $3; next if (!("$file" eq "$diff_file")); - $cmd = "git blame -l -L $diff_start,+$diff_length $file\n"; + $cmd = "git blame -l -L $diff_start,+$diff_length $file"; @commits = save_commits($cmd, @commits); } } else { if (-f $file) { - $cmd = "git blame -l $file\n"; + $cmd = "git blame -l $file"; @commits = save_commits($cmd, @commits); } } @@ -678,11 +732,15 @@ sub git_assign_blame { if (!$email_git_penguin_chiefs) { @lines = grep(!/${penguin_chiefs}/i, @lines); } + # cut -f2- -d":" s/.*:\s*(.+)\s*/$1/ for (@lines); - $hash{$_}++ for @lines; $total_sign_offs += @lines; + + @lines = mailmap(@lines); + + $hash{$_}++ for @lines; } $count = 0; -- cgit v1.2.3-70-g09d2 From 42498316132e89ca2835b977a7cfb32a83e97b35 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 Sep 2009 17:04:21 -0700 Subject: scripts/get_maintainer.pl: using --separator implies --nomultiline If a person sets a separator, it's only used if --nomultiline is set. Don't make the command line also include --nomultiline in that case. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 8b80b5abb86..446803efe62 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -105,6 +105,10 @@ if ($#ARGV < 0) { die "$P: argument missing: patchfile or -f file please\n"; } +if ($output_separator ne ", ") { + $output_multiline = 0; +} + my $selections = $email + $scm + $status + $subsystem + $web; if ($selections == 0) { usage(); @@ -376,6 +380,7 @@ MAINTAINER field selection options: Output type options: --separator [, ] => separator for multiple entries on 1 line + using --separator also sets --nomultiline if --separator is not [, ] --multiline => print 1 entry per line Other options: -- cgit v1.2.3-70-g09d2 From 11ecf53c97863a0609db3816d82f1d0ddf3d2bc2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 Sep 2009 17:04:22 -0700 Subject: scripts/get_maintainer.pl: add --remove-duplicates Allow control over the elimination of duplicate email names and addresses --remove-duplicates will use the first email name or address presented --noremove-duplicates will emit all names and addresses --remove-duplicates is enabled by default For instance: $ ./scripts/get_maintainer.pl -f drivers/char/tty_ioctl.c Greg Kroah-Hartman Alan Cox Mike Frysinger Alexey Dobriyan linux-kernel@vger.kernel.org $ ./scripts/get_maintainer.pl -f --noremove-duplicates drivers/char/tty_ioctl.c Greg Kroah-Hartman Alan Cox Alan Cox Alan Cox Mike Frysinger Alexey Dobriyan linux-kernel@vger.kernel.org Using --remove-duplicates could eliminate multiple maintainers that share the same name but not the same email address. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 108 +++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 53 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 446803efe62..473b6741d55 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -30,6 +30,7 @@ my $email_git_max_maintainers = 5; my $email_git_min_percent = 5; my $email_git_since = "1-year-ago"; my $email_git_blame = 0; +my $email_remove_duplicates = 1; my $output_multiline = 1; my $output_separator = ", "; my $scm = 0; @@ -71,6 +72,7 @@ if (!GetOptions( 'git-min-percent=i' => \$email_git_min_percent, 'git-since=s' => \$email_git_since, 'git-blame!' => \$email_git_blame, + 'remove-duplicates!' => \$email_remove_duplicates, 'm!' => \$email_maintainer, 'n!' => \$email_usename, 'l!' => \$email_list, @@ -158,32 +160,28 @@ close(MAINT); my %mailmap; -open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n"; -while () { - my $line = $_; +if ($email_remove_duplicates) { + open(MAILMAP, "<${lk_path}.mailmap") || warn "$P: Can't open .mailmap\n"; + while () { + my $line = $_; - next if ($line =~ m/^\s*#/); - next if ($line =~ m/^\s*$/); + next if ($line =~ m/^\s*#/); + next if ($line =~ m/^\s*$/); - my ($name, $address) = parse_email($line); - $line = format_email($name, $address); + my ($name, $address) = parse_email($line); + $line = format_email($name, $address); - next if ($line =~ m/^\s*$/); + next if ($line =~ m/^\s*$/); - if (exists($mailmap{$name})) { - my $obj = $mailmap{$name}; - push(@$obj, $address); - } else { - my @arr = ($address); - $mailmap{$name} = \@arr; - } -} -close(MAILMAP); - -foreach my $name (sort {$mailmap{$a} <=> $mailmap{$b}} keys %mailmap) { - my $obj = $mailmap{$name}; - foreach my $address (@$obj) { + if (exists($mailmap{$name})) { + my $obj = $mailmap{$name}; + push(@$obj, $address); + } else { + my @arr = ($address); + $mailmap{$name} = \@arr; + } } + close(MAILMAP); } ## use the filenames on the command line or find the filenames in the patchfiles @@ -373,6 +371,7 @@ MAINTAINER field selection options: --n => include name 'Full Name ' --l => include list(s) if any --s => include subscriber only list(s) if any + --remove-duplicates => minimize duplicate email names/addresses --scm => print SCM tree(s) if any --status => print status if any --subsystem => print subsystem name if any @@ -389,7 +388,7 @@ Other options: --help => show this help information Default options: - [--email --git --m --n --l --multiline --pattern-depth=0] + [--email --git --m --n --l --multiline --pattern-depth=0 --remove-duplicates] Notes: Using "-f directory" may give unexpected results: @@ -438,12 +437,12 @@ sub parse_email { my $name = ""; my $address = ""; - if ($formatted_email =~ /^([^<]+)<(.*\@.*)>.*$/) { + if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) { $name = $1; $address = $2; - } elsif ($formatted_email =~ /^\s*<(.*\@.*)>.*$/) { + } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) { $address = $1; - } elsif ($formatted_email =~ /^\s*(.*\@.*)$/) { + } elsif ($formatted_email =~ /^(.+\@\S*)$/) { $address = $1; } @@ -542,14 +541,16 @@ sub add_categories { } } -sub email_address_inuse { - my ($test_address) = @_; +my %email_hash_name; +my %email_hash_address; - foreach my $line (@email_to) { - my ($name, $address) = parse_email($line); +sub email_inuse { + my ($name, $address) = @_; + + return 1 if (($name eq "") && ($address eq "")); + return 1 if (($name ne "") && exists($email_hash_name{$name})); + return 1 if (($address ne "") && exists($email_hash_address{$address})); - return 1 if ($address eq $test_address); - } return 0; } @@ -558,8 +559,12 @@ sub push_email_address { my ($name, $address) = parse_email($line); - if (!email_address_inuse($address)) { + if (!$email_remove_duplicates) { + push(@email_to, format_email($name, $address)); + } elsif (!email_inuse($name, $address)) { push(@email_to, format_email($name, $address)); + $email_hash_name{$name}++; + $email_hash_address{$address}++; } } @@ -600,6 +605,9 @@ sub mailmap { my ($name, $address) = parse_email($line); if (!exists($hash{$name})) { $hash{$name} = $address; + } elsif ($address ne $hash{$name}) { + $address = $hash{$name}; + $line = format_email($name, $address); } if (exists($mailmap{$name})) { my $obj = $mailmap{$name}; @@ -652,31 +660,23 @@ sub recent_git_signoffs { $total_sign_offs = @lines; - @lines = mailmap(@lines); + if ($email_remove_duplicates) { + @lines = mailmap(@lines); + } @lines = sort(@lines); + # uniq -c - foreach my $line (@lines) { - $hash{$line}++; - } + $hash{$_}++ for @lines; + # sort -rn - @lines = (); foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) { - push(@lines,"$hash{$line} $line"); - } - - foreach my $line (@lines) { - if ($line =~ m/([0-9]+)\s+(.*)/) { - my $sign_offs = $1; - $line = $2; - $count++; - if ($sign_offs < $email_git_min_signatures || - $count > $email_git_max_maintainers || - $sign_offs * 100 / $total_sign_offs < $email_git_min_percent) { - last; - } - push_email_address($line); - } + my $sign_offs = $hash{$line}; + $count++; + last if ($sign_offs < $email_git_min_signatures || + $count > $email_git_max_maintainers || + $sign_offs * 100 / $total_sign_offs < $email_git_min_percent); + push_email_address($line); } } @@ -743,7 +743,9 @@ sub git_assign_blame { $total_sign_offs += @lines; - @lines = mailmap(@lines); + if ($email_remove_duplicates) { + @lines = mailmap(@lines); + } $hash{$_}++ for @lines; } -- cgit v1.2.3-70-g09d2 From b781655a6f6d15bdcc96f2cc1d56b0658f9cf0b4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 21 Sep 2009 17:04:24 -0700 Subject: scripts/get_maintainer.pl: add maintainers in order listed in matched section Previous behavior was "bottom-up" in each section from the pattern "F:" entry that matched. Now information is entered into the various lists in the "as entered" order for each matched section. This also allows the F: entry to be put anywhere in a section, not just as the last entries in the section. And a couple of improvements: Don't alphabetically sort before outputting the matched scm, status, subsystem and web sections. Ignore content after a single email address so these entries are acceptable M: name
whatever other comment And a fix: Make an M: entry without a name again use the name from an immediately preceding P: line if it exists. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 70 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 16 deletions(-) (limited to 'scripts/get_maintainer.pl') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 473b6741d55..cdb44b63342 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -313,22 +313,22 @@ if ($email || $email_list) { } if ($scm) { - @scm = sort_and_uniq(@scm); + @scm = uniq(@scm); output(@scm); } if ($status) { - @status = sort_and_uniq(@status); + @status = uniq(@status); output(@status); } if ($subsystem) { - @subsystem = sort_and_uniq(@subsystem); + @subsystem = uniq(@subsystem); output(@subsystem); } if ($web) { - @web = sort_and_uniq(@web); + @web = uniq(@web); output(@web); } @@ -442,7 +442,7 @@ sub parse_email { $address = $2; } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) { $address = $1; - } elsif ($formatted_email =~ /^(.+\@\S*)$/) { + } elsif ($formatted_email =~ /^(.+\@\S*).*$/) { $address = $1; } @@ -485,12 +485,46 @@ sub format_email { return $formatted_email; } -sub add_categories { +sub find_starting_index { + + my ($index) = @_; + + while ($index > 0) { + my $tv = $typevalue[$index]; + if (!($tv =~ m/^(\C):\s*(.*)/)) { + last; + } + $index--; + } + + return $index; +} + +sub find_ending_index { my ($index) = @_; - $index = $index - 1; - while ($index >= 0) { + while ($index < @typevalue) { my $tv = $typevalue[$index]; + if (!($tv =~ m/^(\C):\s*(.*)/)) { + last; + } + $index++; + } + + return $index; +} + +sub add_categories { + my ($index) = @_; + + my $i; + my $start = find_starting_index($index); + my $end = find_ending_index($index); + + push(@subsystem, $typevalue[$start]); + + for ($i = $start + 1; $i < $end; $i++) { + my $tv = $typevalue[$i]; if ($tv =~ m/^(\C):\s*(.*)/) { my $ptype = $1; my $pvalue = $2; @@ -513,11 +547,12 @@ sub add_categories { } elsif ($ptype eq "M") { my ($name, $address) = parse_email($pvalue); if ($name eq "") { - if ($index >= 0) { - my $tv = $typevalue[$index - 1]; + if ($i > 0) { + my $tv = $typevalue[$i - 1]; if ($tv =~ m/^(\C):\s*(.*)/) { if ($1 eq "P") { $name = $2; + $pvalue = format_email($name, $address); } } } @@ -532,11 +567,6 @@ sub add_categories { } elsif ($ptype eq "S") { push(@status, $pvalue); } - - $index--; - } else { - push(@subsystem,$tv); - $index = -1; } } } @@ -559,6 +589,10 @@ sub push_email_address { my ($name, $address) = parse_email($line); + if ($address eq "") { + return 0; + } + if (!$email_remove_duplicates) { push(@email_to, format_email($name, $address)); } elsif (!email_inuse($name, $address)) { @@ -566,6 +600,8 @@ sub push_email_address { $email_hash_name{$name}++; $email_hash_address{$address}++; } + + return 1; } sub push_email_addresses { @@ -581,7 +617,9 @@ sub push_email_addresses { push_email_address($entry); } } else { - warn("Invalid MAINTAINERS address: '" . $address . "'\n"); + if (!push_email_address($address)) { + warn("Invalid MAINTAINERS address: '" . $address . "'\n"); + } } } -- cgit v1.2.3-70-g09d2