@@ -189,7 +189,9 @@ class id_parser(object):
# Should we check for more SPDX ids in the same file and
# complain if there are any?
#
- break
+ return self.curline - 1
+
+ return -1
except ParserException as pe:
if pe.tok:
@@ -200,7 +202,7 @@ class id_parser(object):
sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, col, pe.txt))
self.spdx_errors += 1
-def scan_git_tree(tree):
+def scan_git_tree(ln_count, tree):
for el in tree.traverse():
# Exclude stuff which would make pointless noise
# FIXME: Put this somewhere more sensible
@@ -211,12 +213,15 @@ def scan_git_tree(tree):
if not os.path.isfile(el.path):
continue
with open(el.path, 'rb') as fd:
- parser.parse_lines(fd, args.maxlines, el.path)
+ ln = parser.parse_lines(fd, args.maxlines, el.path)
+ if ln >= 0:
+ ln_count[ln] += 1;
+ return ln_count
-def scan_git_subtree(tree, path):
+def scan_git_subtree(ln_count, tree, path):
for p in path.strip('/').split('/'):
tree = tree[p]
- scan_git_tree(tree)
+ scan_git_tree(ln_count, tree)
if __name__ == '__main__':
@@ -225,6 +230,7 @@ if __name__ == '__main__':
ap.add_argument('-m', '--maxlines', type=int, default=15,
help='Maximum number of lines to scan in a file. Default 15')
ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
+ ap.add_argument('-H', '--histogram', action='store_true', help='Verbose histogram about SPDX header position')
args = ap.parse_args()
# Sanity check path arguments
@@ -255,23 +261,31 @@ if __name__ == '__main__':
sys.stderr.write('%s\n' %traceback.format_exc())
sys.exit(1)
+ ln_count= [0] * args.maxlines
+
try:
if len(args.path) and args.path[0] == '-':
stdin = os.fdopen(sys.stdin.fileno(), 'rb')
- parser.parse_lines(stdin, args.maxlines, '-')
+ ln = parser.parse_lines(stdin, args.maxlines, '-')
+ if ln >= 0:
+ ln_count[ln] += 1;
+
else:
if args.path:
for p in args.path:
if os.path.isfile(p):
- parser.parse_lines(open(p, 'rb'), args.maxlines, p)
+ ln = parser.parse_lines(open(p, 'rb'), args.maxlines, p)
+ if ln >= 0:
+ ln_count[ln] += 1;
+
elif os.path.isdir(p):
- scan_git_subtree(repo.head.reference.commit.tree, p)
+ scan_git_subtree(ln_count, repo.head.reference.commit.tree, p)
else:
sys.stderr.write('path %s does not exist\n' %p)
sys.exit(1)
else:
# Full git tree scan
- scan_git_tree(repo.head.commit.tree)
+ scan_git_tree(ln_count, repo.head.commit.tree)
if args.verbose:
sys.stderr.write('\n')
@@ -284,6 +298,11 @@ if __name__ == '__main__':
sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked)
sys.stderr.write('Files with SPDX: %12d\n' %parser.spdx_valid)
sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
+ sys.stderr.write('\n')
+ if args.histogram:
+ for i in range(0, len(ln_count)):
+ if ln_count[i] > 0:
+ sys.stderr.write('Files with SPDX at line #%-5d: %12d\n' % (i + 1, ln_count[i]))
sys.exit(0)
According with Documentation/process/license-rules.rst, SPDX headers can be found only at the first lines. However, this script doesn't enforce it, and several files violate that. It could be useful to be able to show a histogram with the number of files that have the SPDX header on each line number. This feature is optional, enabled with -H or --histogram. Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org> --- scripts/spdxcheck.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-)