diff mbox series

[v2,5/7] Add git-contributors script to notify about merges

Message ID 20250122-update-release-v2-5-d01529db3aa5@kernel.org (mailing list archive)
State New
Headers show
Series Update release.sh | expand

Commit Message

Andrey Albershteyn Jan. 22, 2025, 3:01 p.m. UTC
Add python script used to collect emails over all changes merged in
the next release.

CC: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Andrey Albershteyn <aalbersh@kernel.org>
---
 tools/git-contributors.py | 94 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

Comments

Darrick J. Wong Jan. 28, 2025, 5:43 p.m. UTC | #1
On Wed, Jan 22, 2025 at 04:01:31PM +0100, Andrey Albershteyn wrote:
> Add python script used to collect emails over all changes merged in
> the next release.
> 
> CC: "Darrick J. Wong" <djwong@kernel.org>
> Signed-off-by: Andrey Albershteyn <aalbersh@kernel.org>

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>

--D

> ---
>  tools/git-contributors.py | 94 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 94 insertions(+)
> 
> diff --git a/tools/git-contributors.py b/tools/git-contributors.py
> new file mode 100755
> index 0000000000000000000000000000000000000000..83bbe8ce0ee1dcbd591c6d3016d553fac2a7d286
> --- /dev/null
> +++ b/tools/git-contributors.py
> @@ -0,0 +1,94 @@
> +#!/usr/bin/python3
> +
> +# List all contributors to a series of git commits.
> +# Copyright(C) 2025 Oracle, All Rights Reserved.
> +# Licensed under GPL 2.0 or later
> +
> +import re
> +import subprocess
> +import io
> +import sys
> +import argparse
> +import email.utils
> +
> +DEBUG = False
> +
> +def backtick(args):
> +    '''Generator function that yields lines of a program's stdout.'''
> +    if DEBUG:
> +        print(' '.join(args))
> +    p = subprocess.Popen(args, stdout = subprocess.PIPE)
> +    for line in io.TextIOWrapper(p.stdout, encoding="utf-8"):
> +        yield line
> +
> +class find_developers(object):
> +    def __init__(self):
> +        tags = '%s|%s|%s|%s|%s|%s|%s|%s' % (
> +            'signed-off-by',
> +            'acked-by',
> +            'cc',
> +            'reviewed-by',
> +            'reported-by',
> +            'tested-by',
> +            'suggested-by',
> +            'reported-and-tested-by')
> +        # some tag, a colon, a space, and everything after that
> +        regex1 = r'^(%s):\s+(.+)$' % tags
> +
> +        self.r1 = re.compile(regex1, re.I)
> +
> +    def run(self, lines):
> +        addr_list = []
> +
> +        for line in lines:
> +            l = line.strip()
> +
> +            # emailutils can handle abominations like:
> +            #
> +            # Reviewed-by: Bogus J. Simpson <bogus@simpson.com>
> +            # Reviewed-by: "Bogus J. Simpson" <bogus@simpson.com>
> +            # Reviewed-by: bogus@simpson.com
> +            # Cc: <stable@vger.kernel.org> # v6.9
> +            # Tested-by: Moo Cow <foo@bar.com> # powerpc
> +            m = self.r1.match(l)
> +            if not m:
> +                continue
> +            (name, addr) = email.utils.parseaddr(m.expand(r'\g<2>'))
> +
> +            # This last split removes anything after a hash mark,
> +            # because someone could have provided an improperly
> +            # formatted email address:
> +            #
> +            # Cc: stable@vger.kernel.org # v6.19+
> +            #
> +            # emailutils doesn't seem to catch this, and I can't
> +            # fully tell from RFC2822 that this isn't allowed.  I
> +            # think it is because dtext doesn't forbid spaces or
> +            # hash marks.
> +            addr_list.append(addr.split('#')[0])
> +
> +        return sorted(set(addr_list))
> +
> +def main():
> +    parser = argparse.ArgumentParser(description = "List email addresses of contributors to a series of git commits.")
> +    parser.add_argument("revspec", nargs = '?', default = None, \
> +            help = "git revisions to process.")
> +    parser.add_argument("--delimiter", type = str, default = '\n', \
> +            help = "Separate each email address with this string.")
> +    args = parser.parse_args()
> +
> +    fd = find_developers()
> +    if args.revspec:
> +        # read git commits from repo
> +        contributors = fd.run(backtick(['git', 'log', '--pretty=medium',
> +                  args.revspec]))
> +    else:
> +        # read patch from stdin
> +        contributors = fd.run(sys.stdin.readlines())
> +
> +    print(args.delimiter.join(sorted(contributors)))
> +    return 0
> +
> +if __name__ == '__main__':
> +    sys.exit(main())
> +
> 
> -- 
> 2.47.0
> 
>
diff mbox series

Patch

diff --git a/tools/git-contributors.py b/tools/git-contributors.py
new file mode 100755
index 0000000000000000000000000000000000000000..83bbe8ce0ee1dcbd591c6d3016d553fac2a7d286
--- /dev/null
+++ b/tools/git-contributors.py
@@ -0,0 +1,94 @@ 
+#!/usr/bin/python3
+
+# List all contributors to a series of git commits.
+# Copyright(C) 2025 Oracle, All Rights Reserved.
+# Licensed under GPL 2.0 or later
+
+import re
+import subprocess
+import io
+import sys
+import argparse
+import email.utils
+
+DEBUG = False
+
+def backtick(args):
+    '''Generator function that yields lines of a program's stdout.'''
+    if DEBUG:
+        print(' '.join(args))
+    p = subprocess.Popen(args, stdout = subprocess.PIPE)
+    for line in io.TextIOWrapper(p.stdout, encoding="utf-8"):
+        yield line
+
+class find_developers(object):
+    def __init__(self):
+        tags = '%s|%s|%s|%s|%s|%s|%s|%s' % (
+            'signed-off-by',
+            'acked-by',
+            'cc',
+            'reviewed-by',
+            'reported-by',
+            'tested-by',
+            'suggested-by',
+            'reported-and-tested-by')
+        # some tag, a colon, a space, and everything after that
+        regex1 = r'^(%s):\s+(.+)$' % tags
+
+        self.r1 = re.compile(regex1, re.I)
+
+    def run(self, lines):
+        addr_list = []
+
+        for line in lines:
+            l = line.strip()
+
+            # emailutils can handle abominations like:
+            #
+            # Reviewed-by: Bogus J. Simpson <bogus@simpson.com>
+            # Reviewed-by: "Bogus J. Simpson" <bogus@simpson.com>
+            # Reviewed-by: bogus@simpson.com
+            # Cc: <stable@vger.kernel.org> # v6.9
+            # Tested-by: Moo Cow <foo@bar.com> # powerpc
+            m = self.r1.match(l)
+            if not m:
+                continue
+            (name, addr) = email.utils.parseaddr(m.expand(r'\g<2>'))
+
+            # This last split removes anything after a hash mark,
+            # because someone could have provided an improperly
+            # formatted email address:
+            #
+            # Cc: stable@vger.kernel.org # v6.19+
+            #
+            # emailutils doesn't seem to catch this, and I can't
+            # fully tell from RFC2822 that this isn't allowed.  I
+            # think it is because dtext doesn't forbid spaces or
+            # hash marks.
+            addr_list.append(addr.split('#')[0])
+
+        return sorted(set(addr_list))
+
+def main():
+    parser = argparse.ArgumentParser(description = "List email addresses of contributors to a series of git commits.")
+    parser.add_argument("revspec", nargs = '?', default = None, \
+            help = "git revisions to process.")
+    parser.add_argument("--delimiter", type = str, default = '\n', \
+            help = "Separate each email address with this string.")
+    args = parser.parse_args()
+
+    fd = find_developers()
+    if args.revspec:
+        # read git commits from repo
+        contributors = fd.run(backtick(['git', 'log', '--pretty=medium',
+                  args.revspec]))
+    else:
+        # read patch from stdin
+        contributors = fd.run(sys.stdin.readlines())
+
+    print(args.delimiter.join(sorted(contributors)))
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
+