@@ -7,6 +7,10 @@ from argparse import ArgumentParser
from xen_analysis.diff_tool.cppcheck_report import CppcheckReport
from xen_analysis.diff_tool.debug import Debug
from xen_analysis.diff_tool.report import ReportError
+from xen_analysis.diff_tool.unified_format_parser import \
+ (UnifiedFormatParser, UnifiedFormatParseError)
+from xen_analysis.settings import repo_dir
+from xen_analysis.utils import invoke_command
def log_info(text, end='\n'):
@@ -36,9 +40,32 @@ def main(argv):
"against the baseline.")
parser.add_argument("-v", "--verbose", action='store_true',
help="Print more informations during the run.")
+ parser.add_argument("--patch", type=str,
+ help="The patch file containing the changes to the "
+ "code, from the baseline analysis result to the "
+ "'check report' analysis result.\n"
+ "Do not use with --baseline-rev/--report-rev")
+ parser.add_argument("--baseline-rev", type=str,
+ help="Revision or SHA of the codebase analysed to "
+ "create the baseline report.\n"
+ "Use together with --report-rev")
+ parser.add_argument("--report-rev", type=str,
+ help="Revision or SHA of the codebase analysed to "
+ "create the 'check report'.\n"
+ "Use together with --baseline-rev")
args = parser.parse_args()
+ if args.patch and (args.baseline_rev or args.report_rev):
+ print("ERROR: '--patch' argument can't be used with '--baseline-rev'"
+ " or '--report-rev'.")
+ sys.exit(1)
+
+ if bool(args.baseline_rev) != bool(args.report_rev):
+ print("ERROR: '--baseline-rev' must be used together with "
+ "'--report-rev'.")
+ sys.exit(1)
+
if args.out == "stdout":
file_out = sys.stdout
else:
@@ -63,11 +90,34 @@ def main(argv):
new_rep.parse()
debug.debug_print_parsed_report(new_rep)
log_info(" [OK]")
- except ReportError as e:
+ diff_source = None
+ if args.patch:
+ diff_source = os.path.realpath(args.patch)
+ elif args.baseline_rev:
+ git_diff = invoke_command(
+ "git --git-dir={}/.git diff -C -C {}..{}"
+ .format(repo_dir, args.baseline_rev, args.report_rev),
+ True, "Error occured invoking:\n{}\n\n{}"
+ )
+ diff_source = git_diff.splitlines(keepends=True)
+ if diff_source:
+ log_info("Parsing changes...", "")
+ diffs = UnifiedFormatParser(diff_source)
+ debug.debug_print_parsed_diff(diffs)
+ log_info(" [OK]")
+ except (ReportError, UnifiedFormatParseError) as e:
print("ERROR: {}".format(e))
sys.exit(1)
- output = new_rep - baseline
+ if args.patch or args.baseline_rev:
+ log_info("Patching baseline...", "")
+ baseline_patched = baseline.patch(diffs)
+ debug.debug_print_patched_report(baseline_patched)
+ log_info(" [OK]")
+ output = new_rep - baseline_patched
+ else:
+ output = new_rep - baseline
+
print(output, end="", file=file_out)
if len(output) > 0:
@@ -3,6 +3,7 @@
from __future__ import print_function
import os
from .report import Report
+from .unified_format_parser import UnifiedFormatParser
class Debug:
@@ -38,3 +39,23 @@ class Debug:
if not self.args.debug:
return
self.__debug_print_report(report, ".parsed")
+
+ def debug_print_patched_report(self, report):
+ # type: (Report) -> None
+ if not self.args.debug:
+ return
+ # The patched report contains already .patched in its name
+ self.__debug_print_report(report, "")
+
+ def debug_print_parsed_diff(self, diff):
+ # type: (UnifiedFormatParser) -> None
+ if not self.args.debug:
+ return
+ diff_filename = diff.get_diff_path()
+ out_pathname = self.__get_debug_out_filename(diff_filename, ".parsed")
+ try:
+ with open(out_pathname, "wt") as outfile:
+ for change_obj in diff.get_change_sets().values():
+ print(change_obj, end="", file=outfile)
+ except OSError as e:
+ print("ERROR: Issue opening file {}: {}".format(out_pathname, e))
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import os
+from .unified_format_parser import UnifiedFormatParser, ChangeSet
class ReportError(Exception):
@@ -47,6 +48,92 @@ class Report(object):
self.__entries[entry_path] = [entry]
self.__last_line_order += 1
+ def remove_entries(self, entry_file_path):
+ # type: (str) -> None
+ del self.__entries[entry_file_path]
+
+ def remove_entry(self, entry_path, line_id):
+ # type: (str, int) -> None
+ if entry_path in self.__entries.keys():
+ len_entry_path = len(self.__entries[entry_path])
+ if len_entry_path == 1:
+ del self.__entries[entry_path]
+ else:
+ if line_id in self.__entries[entry_path]:
+ self.__entries[entry_path].remove(line_id)
+
+ def patch(self, diff_obj):
+ # type: (UnifiedFormatParser) -> Report
+ filename, file_extension = os.path.splitext(self.__path)
+ patched_report = self.__class__(filename + ".patched" + file_extension)
+ remove_files = []
+ rename_files = []
+ remove_entry = []
+ ChangeMode = ChangeSet.ChangeMode
+
+ # Copy entries from this report to the report we are going to patch
+ for entries in self.__entries.values():
+ for entry in entries:
+ patched_report.add_entry(entry.file_path, entry.line_number,
+ entry.text)
+
+ # Patch the output report
+ patched_rep_entries = patched_report.get_report_entries()
+ for file_diff, change_obj in diff_obj.get_change_sets().items():
+ if change_obj.is_change_mode(ChangeMode.COPY):
+ # Copy the original entry pointed by change_obj.orig_file into
+ # a new key in the patched report named change_obj.dst_file,
+ # that here is file_diff variable content, because this
+ # change_obj is pushed into the change_sets with the
+ # change_obj.dst_file key
+ if change_obj.orig_file in self.__entries.keys():
+ for entry in self.__entries[change_obj.orig_file]:
+ patched_report.add_entry(file_diff,
+ entry.line_number,
+ entry.text)
+
+ if file_diff in patched_rep_entries.keys():
+ if change_obj.is_change_mode(ChangeMode.DELETE):
+ # No need to check changes here, just remember to delete
+ # the file from the report
+ remove_files.append(file_diff)
+ continue
+ elif change_obj.is_change_mode(ChangeMode.RENAME):
+ # Remember to rename the file entry on this report
+ rename_files.append(change_obj)
+
+ for line_num, change_type in change_obj.get_change_set():
+ len_rep = len(patched_rep_entries[file_diff])
+ for i in range(len_rep):
+ rep_item = patched_rep_entries[file_diff][i]
+ if change_type == ChangeSet.ChangeType.REMOVE:
+ if rep_item.line_number == line_num:
+ # This line is removed with this changes,
+ # append to the list of entries to be removed
+ remove_entry.append(rep_item)
+ elif rep_item.line_number > line_num:
+ rep_item.line_number -= 1
+ elif change_type == ChangeSet.ChangeType.ADD:
+ if rep_item.line_number >= line_num:
+ rep_item.line_number += 1
+ # Remove deleted entries from the list
+ if len(remove_entry) > 0:
+ for entry in remove_entry:
+ patched_report.remove_entry(entry.file_path,
+ entry.line_id)
+ del remove_entry[:]
+
+ if len(remove_files) > 0:
+ for file_name in remove_files:
+ patched_report.remove_entries(file_name)
+
+ if len(rename_files) > 0:
+ for change_obj in rename_files:
+ patched_rep_entries[change_obj.dst_file] = \
+ patched_rep_entries.pop(change_obj.orig_file)
+
+ return patched_report
+
def to_list(self):
# type: () -> list
report_list = []
new file mode 100644
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+
+import re
+import sys
+
+try:
+ from enum import Enum
+except Exception:
+ if sys.version_info[0] == 2:
+ print("Please install enum34 package when using python 2.")
+ else:
+ print("Please use python version 3.5 or above.")
+ sys.exit(1)
+
+try:
+ from typing import Tuple
+except Exception:
+ if sys.version_info[0] == 2:
+ print("Please install typing package when using python 2.")
+ else:
+ print("Please use python version 3.5 or above.")
+ sys.exit(1)
+
+
+class UnifiedFormatParseError(Exception):
+ pass
+
+
+class ParserState(Enum):
+ FIND_DIFF_HEADER = 0
+ REGISTER_CHANGES = 1
+ FIND_HUNK_OR_DIFF_HEADER = 2
+
+
+class ChangeSet(object):
+ class ChangeType(Enum):
+ REMOVE = 0
+ ADD = 1
+
+ class ChangeMode(Enum):
+ NONE = 0
+ CHANGE = 1
+ RENAME = 2
+ DELETE = 3
+ COPY = 4
+
+ def __init__(self, a_file, b_file):
+ # type: (str, str) -> None
+ self.orig_file = a_file
+ self.dst_file = b_file
+ self.change_mode = ChangeSet.ChangeMode.NONE
+ self.__changes = []
+
+ def __str__(self):
+ # type: () -> str
+ str_out = "{}: {} -> {}:\n{}\n".format(
+ str(self.change_mode), self.orig_file, self.dst_file,
+ str(self.__changes)
+ )
+ return str_out
+
+ def set_change_mode(self, change_mode):
+ # type: (ChangeMode) -> None
+ self.change_mode = change_mode
+
+ def is_change_mode(self, change_mode):
+ # type: (ChangeMode) -> bool
+ return self.change_mode == change_mode
+
+ def add_change(self, line_number, change_type):
+ # type: (int, ChangeType) -> None
+ self.__changes.append((line_number, change_type))
+
+ def get_change_set(self):
+ # type: () -> dict
+ return self.__changes
+
+
+class UnifiedFormatParser(object):
+ def __init__(self, args):
+ # type: (str | list) -> None
+ if isinstance(args, str):
+ self.__diff_file = args
+ try:
+ with open(self.__diff_file, "rt") as infile:
+ self.__diff_lines = infile.readlines()
+ except OSError as e:
+ raise UnifiedFormatParseError(
+ "Issue with reading file {}: {}"
+ .format(self.__diff_file, e)
+ )
+ elif isinstance(args, list):
+ self.__diff_file = "git-diff-local.txt"
+ self.__diff_lines = args
+ else:
+ raise UnifiedFormatParseError(
+ "UnifiedFormatParser constructor called with wrong arguments")
+
+ self.__git_diff_header = re.compile(r'^diff --git a/(.*) b/(.*)$')
+ self.__git_hunk_header = \
+ re.compile(r'^@@ -\d+,(\d+) \+(\d+),(\d+) @@.*$')
+ self.__diff_set = {}
+ self.__parse()
+
+ def get_diff_path(self):
+ # type: () -> str
+ return self.__diff_file
+
+ def add_change_set(self, change_set):
+ # type: (ChangeSet) -> None
+ if not change_set.is_change_mode(ChangeSet.ChangeMode.NONE):
+ if change_set.is_change_mode(ChangeSet.ChangeMode.COPY):
+ # Add copy change mode items using the dst_file key, because
+ # there might be other changes for the orig_file in this diff
+ self.__diff_set[change_set.dst_file] = change_set
+ else:
+ self.__diff_set[change_set.orig_file] = change_set
+
+ def __parse(self):
+ # type: () -> None
+ def parse_diff_header(line):
+ # type: (str) -> ChangeSet | None
+ change_item = None
+ diff_head = self.__git_diff_header.match(line)
+ if diff_head and diff_head.group(1) and diff_head.group(2):
+ change_item = ChangeSet(diff_head.group(1), diff_head.group(2))
+
+ return change_item
+
+ def parse_hunk_header(line):
+ # type: (str) -> Tuple[int, int, int]
+ file_linenum = -1
+ hunk_a_linemax = -1
+ hunk_b_linemax = -1
+ hunk_head = self.__git_hunk_header.match(line)
+ if hunk_head and hunk_head.group(1) and hunk_head.group(2) \
+ and hunk_head.group(3):
+ file_linenum = int(hunk_head.group(2))
+ hunk_a_linemax = int(hunk_head.group(1))
+ hunk_b_linemax = int(hunk_head.group(3))
+
+ return (file_linenum, hunk_a_linemax, hunk_b_linemax)
+
+ file_linenum = 0
+ hunk_a_linemax = 0
+ hunk_b_linemax = 0
+ diff_elem = None
+ parse_state = ParserState.FIND_DIFF_HEADER
+ ChangeMode = ChangeSet.ChangeMode
+ ChangeType = ChangeSet.ChangeType
+
+ for line in self.__diff_lines:
+ if parse_state == ParserState.FIND_DIFF_HEADER:
+ diff_elem = parse_diff_header(line)
+ if diff_elem:
+ # Found the diff header, go to the next stage
+ parse_state = ParserState.FIND_HUNK_OR_DIFF_HEADER
+ elif parse_state == ParserState.FIND_HUNK_OR_DIFF_HEADER:
+ # Here only these change modalities will be registered:
+ # deleted file mode <mode>
+ # rename from <path>
+ # rename to <path>
+ # copy from <path>
+ # copy to <path>
+ #
+ # These will be ignored:
+ # old mode <mode>
+ # new mode <mode>
+ # new file mode <mode>
+ #
+ # Also these info will be ignored
+ # similarity index <number>
+ # dissimilarity index <number>
+ # index <hash>..<hash> <mode>
+ if line.startswith("deleted file"):
+ # If the file is deleted, register it but don't go through
+ # the changes that will be only a set of lines removed
+ diff_elem.set_change_mode(ChangeMode.DELETE)
+ parse_state = ParserState.FIND_DIFF_HEADER
+ elif line.startswith("new file"):
+ # If the file is new, skip it, as it doesn't give any
+ # useful information on the report translation
+ parse_state = ParserState.FIND_DIFF_HEADER
+ elif line.startswith("rename to"):
+ # Renaming operation can be a pure renaming or a rename
+ # and a set of change, so keep looking for the hunk
+ # header
+ diff_elem.set_change_mode(ChangeMode.RENAME)
+ elif line.startswith("copy to"):
+ # This is a copy operation, mark it
+ diff_elem.set_change_mode(ChangeMode.COPY)
+ else:
+ # Look for the hunk header
+ (file_linenum, hunk_a_linemax, hunk_b_linemax) = \
+ parse_hunk_header(line)
+ if file_linenum >= 0:
+ if diff_elem.is_change_mode(ChangeMode.NONE):
+ # The file has only changes
+ diff_elem.set_change_mode(ChangeMode.CHANGE)
+ parse_state = ParserState.REGISTER_CHANGES
+ else:
+ # ... or there could be a diff header
+ new_diff_elem = parse_diff_header(line)
+ if new_diff_elem:
+ # Found a diff header, register the last change
+ # item
+ self.add_change_set(diff_elem)
+ diff_elem = new_diff_elem
+ elif parse_state == ParserState.REGISTER_CHANGES:
+ if (hunk_b_linemax > 0) and line.startswith("+"):
+ diff_elem.add_change(file_linenum, ChangeType.ADD)
+ hunk_b_linemax -= 1
+ elif (hunk_a_linemax > 0) and line.startswith("-"):
+ diff_elem.add_change(file_linenum, ChangeType.REMOVE)
+ hunk_a_linemax -= 1
+ file_linenum -= 1
+ elif ((hunk_a_linemax + hunk_b_linemax) > 0) and \
+ line.startswith(" "):
+ hunk_a_linemax -= 1 if (hunk_a_linemax > 0) else 0
+ hunk_b_linemax -= 1 if (hunk_b_linemax > 0) else 0
+
+ if (hunk_a_linemax + hunk_b_linemax) <= 0:
+ parse_state = ParserState.FIND_HUNK_OR_DIFF_HEADER
+
+ file_linenum += 1
+
+ if diff_elem is not None:
+ self.add_change_set(diff_elem)
+
+ def get_change_sets(self):
+ # type: () -> dict
+ return self.__diff_set