You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
615 lines
21 KiB
615 lines
21 KiB
#!/usr/bin/env python3 |
|
|
|
# Copyright (c) 2019 Nordic Semiconductor ASA |
|
# SPDX-License-Identifier: Apache-2.0 |
|
|
|
""" |
|
Lists maintainers for files or commits. Similar in function to |
|
scripts/get_maintainer.pl from Linux, but geared towards GitHub. The mapping is |
|
in MAINTAINERS.yml. |
|
|
|
The comment at the top of MAINTAINERS.yml in Zephyr documents the file format. |
|
|
|
See the help texts for the various subcommands for more information. They can |
|
be viewed with e.g. |
|
|
|
./get_maintainer.py path --help |
|
|
|
This executable doubles as a Python library. Identifiers not prefixed with '_' |
|
are part of the library API. The library documentation can be viewed with this |
|
command: |
|
|
|
$ pydoc get_maintainer |
|
""" |
|
|
|
import argparse |
|
import operator |
|
import os |
|
import pathlib |
|
import re |
|
import shlex |
|
import subprocess |
|
import sys |
|
|
|
from yaml import load, YAMLError |
|
try: |
|
# Use the speedier C LibYAML parser if available |
|
from yaml import CSafeLoader as SafeLoader |
|
except ImportError: |
|
from yaml import SafeLoader |
|
|
|
|
|
def _main(): |
|
# Entry point when run as an executable |
|
|
|
args = _parse_args() |
|
try: |
|
args.cmd_fn(Maintainers(args.maintainers), args) |
|
except (MaintainersError, GitError) as e: |
|
_serr(e) |
|
|
|
|
|
def _parse_args(): |
|
# Parses arguments when run as an executable |
|
|
|
parser = argparse.ArgumentParser( |
|
formatter_class=argparse.RawDescriptionHelpFormatter, |
|
description=__doc__, allow_abbrev=False) |
|
|
|
parser.add_argument( |
|
"-m", "--maintainers", |
|
metavar="MAINTAINERS_FILE", |
|
help="Maintainers file to load. If not specified, MAINTAINERS.yml in " |
|
"the top-level repository directory is used, and must exist. " |
|
"Paths in the maintainers file will always be taken as relative " |
|
"to the top-level directory.") |
|
|
|
subparsers = parser.add_subparsers( |
|
help="Available commands (each has a separate --help text)") |
|
|
|
id_parser = subparsers.add_parser( |
|
"path", |
|
help="List area(s) for paths") |
|
id_parser.add_argument( |
|
"paths", |
|
metavar="PATH", |
|
nargs="*", |
|
help="Path to list areas for") |
|
id_parser.set_defaults(cmd_fn=Maintainers._path_cmd) |
|
|
|
commits_parser = subparsers.add_parser( |
|
"commits", |
|
help="List area(s) for commit range") |
|
commits_parser.add_argument( |
|
"commits", |
|
metavar="COMMIT_RANGE", |
|
nargs="*", |
|
help="Commit range to list areas for (default: HEAD~..)") |
|
commits_parser.set_defaults(cmd_fn=Maintainers._commits_cmd) |
|
|
|
list_parser = subparsers.add_parser( |
|
"list", |
|
help="List files in areas") |
|
list_parser.add_argument( |
|
"area", |
|
metavar="AREA", |
|
nargs="?", |
|
help="Name of area to list files in. If not specified, all " |
|
"non-orphaned files are listed (all files that do not appear in " |
|
"any area).") |
|
list_parser.set_defaults(cmd_fn=Maintainers._list_cmd) |
|
|
|
areas_parser = subparsers.add_parser( |
|
"areas", |
|
help="List areas and maintainers") |
|
areas_parser.add_argument( |
|
"maintainer", |
|
metavar="MAINTAINER", |
|
nargs="?", |
|
help="List all areas maintained by maintainer.") |
|
|
|
areas_parser.set_defaults(cmd_fn=Maintainers._areas_cmd) |
|
|
|
orphaned_parser = subparsers.add_parser( |
|
"orphaned", |
|
help="List orphaned files (files that do not appear in any area)") |
|
orphaned_parser.add_argument( |
|
"path", |
|
metavar="PATH", |
|
nargs="?", |
|
help="Limit to files under PATH") |
|
orphaned_parser.set_defaults(cmd_fn=Maintainers._orphaned_cmd) |
|
|
|
count_parser = subparsers.add_parser( |
|
"count", |
|
help="Count areas, unique maintainers, and / or unique collaborators") |
|
count_parser.add_argument( |
|
"-a", |
|
"--count-areas", |
|
action="store_true", |
|
help="Count the number of areas") |
|
count_parser.add_argument( |
|
"-c", |
|
"--count-collaborators", |
|
action="store_true", |
|
help="Count the number of unique collaborators") |
|
count_parser.add_argument( |
|
"-n", |
|
"--count-maintainers", |
|
action="store_true", |
|
help="Count the number of unique maintainers") |
|
count_parser.add_argument( |
|
"-o", |
|
"--count-unmaintained", |
|
action="store_true", |
|
help="Count the number of unmaintained areas") |
|
count_parser.set_defaults(cmd_fn=Maintainers._count_cmd) |
|
|
|
args = parser.parse_args() |
|
if not hasattr(args, "cmd_fn"): |
|
# Called without a subcommand |
|
sys.exit(parser.format_usage().rstrip()) |
|
|
|
return args |
|
|
|
|
|
class Maintainers: |
|
""" |
|
Represents the contents of a maintainers YAML file. |
|
|
|
These attributes are available: |
|
|
|
areas: |
|
A dictionary that maps area names to Area instances, for all areas |
|
defined in the maintainers file |
|
|
|
filename: |
|
The path to the maintainers file |
|
""" |
|
def __init__(self, filename=None): |
|
""" |
|
Creates a Maintainers instance. |
|
|
|
filename (default: None): |
|
Path to the maintainers file to parse. If None, MAINTAINERS.yml in |
|
the top-level directory of the Git repository is used, and must |
|
exist. |
|
""" |
|
if (filename is not None) and (pathlib.Path(filename).exists()): |
|
self.filename = pathlib.Path(filename) |
|
self._toplevel = self.filename.parent |
|
else: |
|
self._toplevel = pathlib.Path(_git("rev-parse", "--show-toplevel")) |
|
self.filename = self._toplevel / "MAINTAINERS.yml" |
|
|
|
self.areas = {} |
|
for area_name, area_dict in _load_maintainers(self.filename).items(): |
|
area = Area() |
|
area.name = area_name |
|
area.status = area_dict.get("status") |
|
area.maintainers = area_dict.get("maintainers", []) |
|
area.collaborators = area_dict.get("collaborators", []) |
|
area.inform = area_dict.get("inform", []) |
|
area.labels = area_dict.get("labels", []) |
|
area.tests = area_dict.get("tests", []) |
|
area.tags = area_dict.get("tags", []) |
|
area.description = area_dict.get("description") |
|
|
|
# area._match_fn(path) tests if the path matches files and/or |
|
# files-regex |
|
area._match_fn = \ |
|
_get_match_fn(area_dict.get("files"), |
|
area_dict.get("files-regex")) |
|
|
|
# Like area._match_fn(path), but for files-exclude and |
|
# files-regex-exclude |
|
area._exclude_match_fn = \ |
|
_get_match_fn(area_dict.get("files-exclude"), |
|
area_dict.get("files-regex-exclude")) |
|
|
|
self.areas[area_name] = area |
|
|
|
def path2areas(self, path): |
|
""" |
|
Returns a list of Area instances for the areas that contain 'path', |
|
taken as relative to the current directory |
|
""" |
|
# Make directory paths end in '/' so that foo/bar matches foo/bar/. |
|
# Skip this check in _contains() itself, because the isdir() makes it |
|
# twice as slow in cases where it's not needed. |
|
is_dir = os.path.isdir(path) |
|
|
|
# Make 'path' relative to the repository root and normalize it. |
|
# normpath() would remove a trailing '/', so we add it afterwards. |
|
path = os.path.normpath(os.path.join( |
|
os.path.relpath(os.getcwd(), self._toplevel), |
|
path)) |
|
|
|
if is_dir: |
|
path += "/" |
|
|
|
return [area for area in self.areas.values() |
|
if area._contains(path)] |
|
|
|
def commits2areas(self, commits): |
|
""" |
|
Returns a set() of Area instances for the areas that contain files that |
|
are modified by the commit range in 'commits'. 'commits' could be e.g. |
|
"HEAD~..", to inspect the tip commit |
|
""" |
|
res = set() |
|
# Final '--' is to make sure 'commits' is interpreted as a commit range |
|
# rather than a path. That might give better error messages. |
|
for path in _git("diff", "--name-only", commits, "--").splitlines(): |
|
res.update(self.path2areas(path)) |
|
return res |
|
|
|
def __repr__(self): |
|
return "<Maintainers for '{}'>".format(self.filename) |
|
|
|
# |
|
# Command-line subcommands |
|
# |
|
|
|
def _path_cmd(self, args): |
|
# 'path' subcommand implementation |
|
|
|
for path in args.paths: |
|
if not os.path.exists(path): |
|
_serr("'{}': no such file or directory".format(path)) |
|
|
|
res = set() |
|
orphaned = [] |
|
for path in args.paths: |
|
areas = self.path2areas(path) |
|
res.update(areas) |
|
if not areas: |
|
orphaned.append(path) |
|
|
|
_print_areas(res) |
|
if orphaned: |
|
if res: |
|
print() |
|
print("Orphaned paths (not in any area):\n" + "\n".join(orphaned)) |
|
|
|
def _commits_cmd(self, args): |
|
# 'commits' subcommand implementation |
|
|
|
commits = args.commits or ("HEAD~..",) |
|
_print_areas({area for commit_range in commits |
|
for area in self.commits2areas(commit_range)}) |
|
|
|
def _areas_cmd(self, args): |
|
# 'areas' subcommand implementation |
|
for area in self.areas.values(): |
|
if args.maintainer: |
|
if args.maintainer in area.maintainers: |
|
print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) |
|
else: |
|
print("{:25}\t{}".format(area.name, ",".join(area.maintainers))) |
|
|
|
def _count_cmd(self, args): |
|
# 'count' subcommand implementation |
|
|
|
if not (args.count_areas or args.count_collaborators or args.count_maintainers or args.count_unmaintained): |
|
# if no specific count is provided, print them all |
|
args.count_areas = True |
|
args.count_collaborators = True |
|
args.count_maintainers = True |
|
args.count_unmaintained = True |
|
|
|
unmaintained = 0 |
|
collaborators = set() |
|
maintainers = set() |
|
|
|
for area in self.areas.values(): |
|
if area.status == 'maintained': |
|
maintainers = maintainers.union(set(area.maintainers)) |
|
elif area.status == 'odd fixes': |
|
unmaintained += 1 |
|
collaborators = collaborators.union(set(area.collaborators)) |
|
|
|
if args.count_areas: |
|
print('{:14}\t{}'.format('areas:', len(self.areas))) |
|
if args.count_maintainers: |
|
print('{:14}\t{}'.format('maintainers:', len(maintainers))) |
|
if args.count_collaborators: |
|
print('{:14}\t{}'.format('collaborators:', len(collaborators))) |
|
if args.count_unmaintained: |
|
print('{:14}\t{}'.format('unmaintained:', unmaintained)) |
|
|
|
def _list_cmd(self, args): |
|
# 'list' subcommand implementation |
|
|
|
if args.area is None: |
|
# List all files that appear in some area |
|
for path in _ls_files(): |
|
for area in self.areas.values(): |
|
if area._contains(path): |
|
print(path) |
|
break |
|
else: |
|
# List all files that appear in the given area |
|
area = self.areas.get(args.area) |
|
if area is None: |
|
_serr("'{}': no such area defined in '{}'" |
|
.format(args.area, self.filename)) |
|
|
|
for path in _ls_files(): |
|
if area._contains(path): |
|
print(path) |
|
|
|
def _orphaned_cmd(self, args): |
|
# 'orphaned' subcommand implementation |
|
|
|
if args.path is not None and not os.path.exists(args.path): |
|
_serr("'{}': no such file or directory".format(args.path)) |
|
|
|
for path in _ls_files(args.path): |
|
for area in self.areas.values(): |
|
if area._contains(path): |
|
break |
|
else: |
|
print(path) # We get here if we never hit the 'break' |
|
|
|
|
|
class Area: |
|
""" |
|
Represents an entry for an area in MAINTAINERS.yml. |
|
|
|
These attributes are available: |
|
|
|
status: |
|
The status of the area, as a string. None if the area has no 'status' |
|
key. See MAINTAINERS.yml. |
|
|
|
maintainers: |
|
List of maintainers. Empty if the area has no 'maintainers' key. |
|
|
|
collaborators: |
|
List of collaborators. Empty if the area has no 'collaborators' key. |
|
|
|
inform: |
|
List of people to inform on pull requests. Empty if the area has no |
|
'inform' key. |
|
|
|
labels: |
|
List of GitHub labels for the area. Empty if the area has no 'labels' |
|
key. |
|
|
|
description: |
|
Text from 'description' key, or None if the area has no 'description' |
|
key |
|
""" |
|
def _contains(self, path): |
|
# Returns True if the area contains 'path', and False otherwise |
|
|
|
return self._match_fn and self._match_fn(path) and not \ |
|
(self._exclude_match_fn and self._exclude_match_fn(path)) |
|
|
|
def __repr__(self): |
|
return "<Area {}>".format(self.name) |
|
|
|
|
|
def _print_areas(areas): |
|
first = True |
|
for area in sorted(areas, key=operator.attrgetter("name")): |
|
if not first: |
|
print() |
|
first = False |
|
|
|
print("""\ |
|
{} |
|
\tstatus: {} |
|
\tmaintainers: {} |
|
\tcollaborators: {} |
|
\tinform: {} |
|
\tlabels: {} |
|
\ttests: {} |
|
\ttags: {} |
|
\tdescription: {}""".format(area.name, |
|
area.status, |
|
", ".join(area.maintainers), |
|
", ".join(area.collaborators), |
|
", ".join(area.inform), |
|
", ".join(area.labels), |
|
", ".join(area.tests), |
|
", ".join(area.tags), |
|
area.description or "")) |
|
|
|
|
|
def _get_match_fn(globs, regexes): |
|
# Constructs a single regex that tests for matches against the globs in |
|
# 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR). |
|
# Returns the search() method of the compiled regex. |
|
# |
|
# Returns None if there are neither globs nor regexes, which should be |
|
# interpreted as no match. |
|
|
|
if not (globs or regexes): |
|
return None |
|
|
|
regex = "" |
|
|
|
if globs: |
|
glob_regexes = [] |
|
for glob in globs: |
|
# Construct a regex equivalent to the glob |
|
glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \ |
|
.replace("?", "[^/]") |
|
|
|
if not glob.endswith("/"): |
|
# Require a full match for globs that don't end in / |
|
glob_regex += "$" |
|
|
|
glob_regexes.append(glob_regex) |
|
|
|
# The glob regexes must anchor to the beginning of the path, since we |
|
# return search(). (?:) is a non-capturing group. |
|
regex += "^(?:{})".format("|".join(glob_regexes)) |
|
|
|
if regexes: |
|
if regex: |
|
regex += "|" |
|
regex += "|".join(regexes) |
|
|
|
return re.compile(regex).search |
|
|
|
|
|
def _load_maintainers(path): |
|
# Returns the parsed contents of the maintainers file 'filename', also |
|
# running checks on the contents. The returned format is plain Python |
|
# dicts/lists/etc., mirroring the structure of the file. |
|
|
|
with open(path, encoding="utf-8") as f: |
|
try: |
|
yaml = load(f, Loader=SafeLoader) |
|
except YAMLError as e: |
|
raise MaintainersError("{}: YAML error: {}".format(path, e)) |
|
|
|
_check_maintainers(path, yaml) |
|
return yaml |
|
|
|
|
|
def _check_maintainers(maints_path, yaml): |
|
# Checks the maintainers data in 'yaml', which comes from the maintainers |
|
# file at maints_path, which is a pathlib.Path instance |
|
|
|
root = maints_path.parent |
|
|
|
def ferr(msg): |
|
_err("{}: {}".format(maints_path, msg)) # Prepend the filename |
|
|
|
if not isinstance(yaml, dict): |
|
ferr("empty or malformed YAML (not a dict)") |
|
|
|
ok_keys = {"status", "maintainers", "collaborators", "inform", "files", |
|
"files-exclude", "files-regex", "files-regex-exclude", |
|
"labels", "description", "tests", "tags"} |
|
|
|
ok_status = {"maintained", "odd fixes", "unmaintained", "obsolete"} |
|
ok_status_s = ", ".join('"' + s + '"' for s in ok_status) # For messages |
|
|
|
for area_name, area_dict in yaml.items(): |
|
if not isinstance(area_dict, dict): |
|
ferr("malformed entry for area '{}' (not a dict)" |
|
.format(area_name)) |
|
|
|
for key in area_dict: |
|
if key not in ok_keys: |
|
ferr("unknown key '{}' in area '{}'" |
|
.format(key, area_name)) |
|
|
|
if "status" in area_dict and \ |
|
area_dict["status"] not in ok_status: |
|
ferr("bad 'status' key on area '{}', should be one of {}" |
|
.format(area_name, ok_status_s)) |
|
|
|
if not area_dict.keys() & {"files", "files-regex"}: |
|
ferr("either 'files' or 'files-regex' (or both) must be specified " |
|
"for area '{}'".format(area_name)) |
|
|
|
if not area_dict.get("maintainers") and area_dict.get("status") == "maintained": |
|
ferr("maintained area '{}' with no maintainers".format(area_name)) |
|
|
|
for list_name in "maintainers", "collaborators", "inform", "files", \ |
|
"files-regex", "labels", "tags", "tests": |
|
if list_name in area_dict: |
|
lst = area_dict[list_name] |
|
if not (isinstance(lst, list) and |
|
all(isinstance(elm, str) for elm in lst)): |
|
ferr("malformed '{}' value for area '{}' -- should " |
|
"be a list of strings".format(list_name, area_name)) |
|
|
|
for files_key in "files", "files-exclude": |
|
if files_key in area_dict: |
|
for glob_pattern in area_dict[files_key]: |
|
# This could be changed if it turns out to be too slow, |
|
# e.g. to only check non-globbing filenames. The tuple() is |
|
# needed due to pathlib's glob() returning a generator. |
|
paths = tuple(root.glob(glob_pattern)) |
|
if not paths: |
|
ferr("glob pattern '{}' in '{}' in area '{}' does not " |
|
"match any files".format(glob_pattern, files_key, |
|
area_name)) |
|
if not glob_pattern.endswith("/"): |
|
if all(path.is_dir() for path in paths): |
|
ferr("glob pattern '{}' in '{}' in area '{}' " |
|
"matches only directories, but has no " |
|
"trailing '/'" |
|
.format(glob_pattern, files_key, |
|
area_name)) |
|
|
|
for files_regex_key in "files-regex", "files-regex-exclude": |
|
if files_regex_key in area_dict: |
|
for regex in area_dict[files_regex_key]: |
|
try: |
|
re.compile(regex) |
|
except re.error as e: |
|
ferr("bad regular expression '{}' in '{}' in " |
|
"'{}': {}".format(regex, files_regex_key, |
|
area_name, e.msg)) |
|
|
|
if "description" in area_dict and \ |
|
not isinstance(area_dict["description"], str): |
|
ferr("malformed 'description' value for area '{}' -- should be a " |
|
"string".format(area_name)) |
|
|
|
|
|
def _git(*args): |
|
# Helper for running a Git command. Returns the rstrip()ed stdout output. |
|
# Called like git("diff"). Exits with SystemError (raised by sys.exit()) on |
|
# errors. |
|
|
|
git_cmd = ("git",) + args |
|
git_cmd_s = " ".join(shlex.quote(word) for word in git_cmd) # For errors |
|
|
|
try: |
|
git_process = subprocess.Popen( |
|
git_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
|
except FileNotFoundError: |
|
_giterr("git executable not found (when running '{}'). Check that " |
|
"it's in listed in the PATH environment variable" |
|
.format(git_cmd_s)) |
|
except OSError as e: |
|
_giterr("error running '{}': {}".format(git_cmd_s, e)) |
|
|
|
stdout, stderr = git_process.communicate() |
|
if git_process.returncode: |
|
_giterr("error running '{}'\n\nstdout:\n{}\nstderr:\n{}".format( |
|
git_cmd_s, stdout.decode("utf-8"), stderr.decode("utf-8"))) |
|
|
|
return stdout.decode("utf-8").rstrip() |
|
|
|
|
|
def _ls_files(path=None): |
|
cmd = ["ls-files"] |
|
if path is not None: |
|
cmd.append(path) |
|
return _git(*cmd).splitlines() |
|
|
|
|
|
def _err(msg): |
|
raise MaintainersError(msg) |
|
|
|
|
|
def _giterr(msg): |
|
raise GitError(msg) |
|
|
|
|
|
def _serr(msg): |
|
# For reporting errors when get_maintainer.py is run as a script. |
|
# sys.exit() shouldn't be used otherwise. |
|
sys.exit("{}: error: {}".format(sys.argv[0], msg)) |
|
|
|
|
|
class MaintainersError(Exception): |
|
"Exception raised for MAINTAINERS.yml-related errors" |
|
|
|
|
|
class GitError(Exception): |
|
"Exception raised for Git-related errors" |
|
|
|
|
|
if __name__ == "__main__": |
|
_main()
|
|
|