diff mbox series

[RFC,3/5,WIP] xen/scripts: add codestyle.py script

Message ID 20230728081144.4124309-4-luca.fancellu@arm.com (mailing list archive)
State Superseded
Headers show
Series clang-format for Xen | expand

Commit Message

Luca Fancellu July 28, 2023, 8:11 a.m. UTC
This script finds every .c and .h file in the xen hypervisor
codebase, takes the exclusion list from docs/misra, removes the
file excluded from the list and for the remaining files is
calling clang-format on them.

TBD: write it better

Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
---
 xen/scripts/codestyle.py | 261 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 261 insertions(+)
 create mode 100755 xen/scripts/codestyle.py

Comments

Jan Beulich Aug. 9, 2023, 3:41 p.m. UTC | #1
On 28.07.2023 10:11, Luca Fancellu wrote:
> This script finds every .c and .h file in the xen hypervisor
> codebase, takes the exclusion list from docs/misra, removes the
> file excluded from the list and for the remaining files is
> calling clang-format on them.

I was going to ask what the intended purpose is, but looking at the
script itself I think I see the script can also be passed a set of
file names, so it won't necessarily scan the entire code base.
That'll make it more suitable to also be used in an incremental
fashion (whether that's also useful to scan patches isn't really
clear to me, but I guess that'll require yet more work).

Jan
Luca Fancellu Aug. 9, 2023, 3:48 p.m. UTC | #2
> On 9 Aug 2023, at 16:41, Jan Beulich <jbeulich@suse.com> wrote:
> 
> On 28.07.2023 10:11, Luca Fancellu wrote:
>> This script finds every .c and .h file in the xen hypervisor
>> codebase, takes the exclusion list from docs/misra, removes the
>> file excluded from the list and for the remaining files is
>> calling clang-format on them.
> 
> I was going to ask what the intended purpose is, but looking at the
> script itself I think I see the script can also be passed a set of
> file names, so it won't necessarily scan the entire code base.
> That'll make it more suitable to also be used in an incremental
> fashion (whether that's also useful to scan patches isn't really
> clear to me, but I guess that'll require yet more work).

Yes, when we will do some automation, it would be useful to just run the script
against the touched files from a patch.
Indeed running it against the patch file is very difficult, since we are doing some
pre-process and post-process that requires some context that might be missing
from the code inside the patch.

Cheers,
Luca
diff mbox series

Patch

diff --git a/xen/scripts/codestyle.py b/xen/scripts/codestyle.py
new file mode 100755
index 000000000000..5bba004d814b
--- /dev/null
+++ b/xen/scripts/codestyle.py
@@ -0,0 +1,261 @@ 
+#!/usr/bin/env python3
+
+import glob
+import os
+import re
+import sys
+from xen_analysis.settings import xen_dir, repo_dir
+from xen_analysis import utils
+from xen_analysis import exclusion_file_list
+from xen_analysis.exclusion_file_list import ExclusionFileListError
+
+# The Xen codestyle states that labels needs to be indented by at least one
+# blank, but clang-format doesn't have an option for that and if it encounters
+# a label indented by blank characters that are less than its indent
+# configuration, it removes the indentation.
+# So this action is meant as post step and checks every label syntax match and
+# it adds one blank before the label
+def action_fix_label_indent(filename, file_lines):
+    label_rgx = re.compile('^[a-zA-Z_][a-zA-Z0-9_]*\s*:.*$')
+
+    for i in range(0, len(file_lines)):
+        if label_rgx.match(file_lines[i]):
+            file_lines[i] = ' ' + file_lines[i]
+
+    return file_lines
+
+
+# clang-format most of the time breaks the content of asm(...) instructions,
+# so with this function, we protect all the asm sections using the special
+# in code comments that tells clang-format to don't touch the block.
+# asm(...) instruction could be also inside macros, so in that case we protect
+# the entire macro that is enclosing the instruction, in the un-protect stage
+# however, we need to do clang-format's job at least on the tab-space conversion
+# and to put the backslash on the right side.
+def action_protect_asm(filename, file_lines, protect):
+    opening_asm = False
+    cf_off_comment = '/* clang-format off */'
+    cf_on_comment = '/* clang-format on */'
+    asm_stx = '(?:asm|__asm__)(?:\s(?:volatile|__volatile__))?'
+
+    if protect:
+        # Look for closing parenthesis with semicolon ');'
+        closing_asm_rgx_rule = '^.*\);.*$'
+        # Look for opening asm syntax
+        opening_asm_rgx_rule = f'^\s*{asm_stx}\s?\(.*$'
+        macro_start_rgx_rule = r'^\s?#\s?define.*\\$'
+        opening_asm_find = r'(asm|__asm__)'
+        opening_asm_replace = cf_off_comment + r'\1'
+        opening_def_find = r'#\s?define'
+        opening_def_replace = f'{cf_off_comment}#define'
+        closing_asm_find    = r'\);'
+        closing_asm_replace = ');' + cf_on_comment
+        closing_def_find    = '\n'
+        closing_def_replace = cf_on_comment + '\n'
+    else:
+        # Look for closing parenthesis with semicolon ');' and with the
+        # special clang-format comment
+        closing_asm_rgx_rule = f'^.*\);.*{re.escape(cf_on_comment)}.*$'
+        # Look for opening asm syntax preceded by the special clang-format
+        # comment, the comment is optional to generalise the algorithm to
+        # un-protect asm outside and inside macros. The case outside is easy
+        # because we will find '/* clang-format off */asm', instead the case
+        # inside is more tricky and we are going to find only 'asm' and then
+        # go backwards until we find '/* clang-format off */#define'
+        opening_asm_rgx_rule = \
+            f'^\s*({re.escape(cf_off_comment)})?{asm_stx}\s?\(.*$'
+        # Look for the define just before the asm invocation, here we look for
+        # '/* clang-format off */#define' or '#define', this is to handle a rare
+        # corner case where an asm invocation is inside a macro, but was not
+        # protected in the 'protect stage', because it was on the same line
+        # of the define and was not ending with backslash but it was exceeding
+        # line width so clang-format formatted anyway.
+        # It's safe because we won't change code that has no clang-format
+        # comments, but at least the tool won't complain
+        macro_start_rgx_rule = \
+            rf'^\s?(?:{re.escape(cf_off_comment)})?#\s?define.*\\$'
+        opening_asm_find = rf'({re.escape(cf_off_comment)}(asm|__asm__))'
+        opening_asm_replace = r'\2'
+        opening_def_find = rf'(?:{re.escape(cf_off_comment)})?#\s?define'
+        opening_def_replace = '#define'
+        closing_asm_find    = rf'\);.*{re.escape(cf_on_comment)}'
+        closing_asm_replace = ');'
+        closing_def_find    = cf_on_comment + '\n'
+        closing_def_replace = '\n'
+
+    opening_asm_rgx = re.compile(opening_asm_rgx_rule)
+    closing_asm_rgx = re.compile(closing_asm_rgx_rule)
+    macro_start_rgx = re.compile(macro_start_rgx_rule)
+
+    i = 0
+    i_max = len(file_lines)
+    macro_begin = -1
+    while i < i_max:
+        # Keep track of the last define we found
+        if macro_start_rgx.match(file_lines[i]):
+            macro_begin = i
+        # Try to find in the current line the asm syntax opening, but don't
+        # touch asm syntax inside macros that usually have the line ended by
+        # a backslash, this is because insert stuff in them will have issues
+        # when later clang-format will format the macro itself
+        if (not opening_asm) and opening_asm_rgx.match(file_lines[i]):
+            line_before = (i > 0) and file_lines[i-1].endswith('\\\n')
+            if file_lines[i].endswith('\\\n') or line_before:
+                # This should never happen, but if it does, it means an
+                # unexpected syntax is found in the file
+                if macro_begin < 0:
+                    raise Exception("Begin of macro not found in {}\n"
+                                    "The asm invocation is on line {}"
+                                    .format(filename, i))
+
+                # asm invocation inside macro, need to protect the entire macro,
+                # macro_begin should point to this define
+                file_lines[macro_begin] = re.sub(opening_def_find,
+                                                 opening_def_replace,
+                                                 file_lines[macro_begin])
+
+                # now go to the end of the macro, it's easy as the first line
+                # without a backslash will be the end
+                for j in range(i, len(file_lines)):
+                    if not file_lines[j].endswith('\\\n'):
+                        file_lines[j] = \
+                            file_lines[j].replace(closing_def_find,
+                                                  closing_def_replace, 1)
+                        # Advance i index, as j is the last line checked
+                        i = j
+                        break
+
+                # In the un-protect stage, we need to do clang-format's job,
+                # so convert tabs to 4 spaces and for macros put the backslash
+                # at the end of the line
+                if not protect:
+                    for j in range(macro_begin, i+1):
+                        # Tab replacement
+                        file_lines[j] = file_lines[j].replace('\t', '    ')
+                        # backslash indentation
+                        line_len = len(file_lines[j])
+                        # 81 counting also the newline character
+                        if file_lines[j].endswith('\\\n') and line_len < 81:
+                            spaces = ' ' * (81 - line_len)
+                            file_lines[j] = \
+                                file_lines[j].replace('\\\n', spaces + '\\\n',
+                                                      1)
+
+                macro_begin = -1
+            else:
+                # asm invocation is not inside a macro, protect the asm syntax
+                opening_asm = True
+                file_lines[i] = re.sub(opening_asm_find, opening_asm_replace,
+                                       file_lines[i])
+
+        # Try to find in the current line the asm closing syntax, only if a
+        # previous line already found an opening asm syntax
+        if opening_asm and closing_asm_rgx.match(file_lines[i]):
+            opening_asm = False
+            file_lines[i] = re.sub(closing_asm_find, closing_asm_replace,
+                                   file_lines[i])
+
+        i += 1
+
+    # This should never happen, but if it does, it means an unexpected syntax
+    # is found in the file
+    if opening_asm:
+        raise Exception("Unbalanced asm parenthesis in {}".format(filename))
+
+    return file_lines
+
+
+# This function runs a set of actions on the passed file name, the actions needs
+# to have a specific interface: action(filename, file_lines), filename will be
+# the name of the file where the script is working, file_lines will be the
+# content of the file expressed as an array of lines in string format.
+# The action function must return this array back to the caller and every
+# possible modifications done on that
+def stage(filename, actions):
+
+    if len(actions) == 0:
+        return
+
+    try:
+        with open(filename, "rt") as infile:
+            file_lines = infile.readlines()
+    except OSError as e:
+        raise Exception("Issue with reading file {}: {}".format(filename, e))
+
+    for task in actions:
+        file_lines = task(filename, file_lines)
+
+    try:
+        with open(filename, "wt") as outfile:
+            outfile.writelines(file_lines)
+    except OSError as e:
+        raise Exception("Issue writing file {}: {}".format(filename, e))
+
+
+def main(argv):
+    # Setup actions for pre-stage and post-stage
+    pre_stage_asm_protect = \
+        lambda file, file_lines: action_protect_asm(file, file_lines, True)
+    post_stage_asm_unprotect = \
+        lambda file, file_lines: action_protect_asm(file, file_lines, False)
+    pre_stage_actions = [pre_stage_asm_protect]
+    post_stage_actions = [post_stage_asm_unprotect, action_fix_label_indent]
+
+    len_args = len(argv)
+    if len_args > 0:
+        c_files = []
+        for i in range(0, len_args):
+            if not argv[i].startswith('/'):
+                # Assume a relative path to Xen
+                abs_path = os.getcwd() + "/" + argv[i]
+                check_path = [abs_path]
+                if '*' in abs_path:
+                    check_path = glob.glob(abs_path)
+                for path in check_path:
+                    if os.path.exists(path):
+                        c_files.append(path)
+                    else:
+                        raise Exception("Malformed path {} solved from {}"
+                                        .format(path, argv[i]))
+    else:
+        # Find all files with .c and .h extension
+        c_files = utils.recursive_find_file(xen_dir, r'.*\.(?:c|h)$')
+
+    try:
+        exclusion_file = \
+                "{}/docs/misra/exclude-list.json".format(repo_dir)
+        exclusion_list = \
+                exclusion_file_list.load_exclusion_file_list(exclusion_file)
+    except ExclusionFileListError as e:
+        print("ERROR: Issue with reading file {}: {}".format(exclusion_file, e))
+        sys.exit(1)
+
+    # Transform the lists of absolute path in the second element of the
+    # exclusion_list into a plain list of absolute path to be exluded
+    excluded_c_files = [j for i in exclusion_list for j in i[1]]
+
+    file_to_format = []
+    for file in c_files:
+        add_to_list = True
+        for excl in excluded_c_files:
+            if excl in file:
+                add_to_list = False
+                break
+        if add_to_list:
+            file_to_format.append(file)
+        elif len_args > 0:
+            print("ERROR: file {} is excluded!".format(file))
+            sys.exit(1)
+
+    for file in file_to_format:
+        try:
+            stage(file, pre_stage_actions)
+            utils.invoke_command("clang-format -i {}".format(file), False,
+                                 Exception, "Error occured invoking: {}\n")
+            stage(file, post_stage_actions)
+        except Exception as e:
+            print("ERROR: {}\n".format(e))
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])