Signal-iOS/Scripts/precommit.py

#!/usr/bin/env python3

import os
import sys
import subprocess
import datetime
import argparse
from typing import Iterable
from pathlib import Path


EXTENSIONS_TO_CHECK = set((".h", ".hpp", ".cpp", ".m", ".mm", ".pch", ".swift"))


git_repo_path = os.path.abspath(
    subprocess.check_output(["git", "rev-parse", "--show-toplevel"], text=True).strip()
)


class include:
    def __init__(self, isInclude, isQuote, body, comment):
        self.isInclude = isInclude
        self.isQuote = isQuote
        self.body = body
        self.comment = comment

    def format(self):
        result = "%s %s%s%s" % (
            ("#include" if self.isInclude else "#import"),
            ('"' if self.isQuote else "<"),
            self.body.strip(),
            ('"' if self.isQuote else ">"),
        )
        if self.comment.strip():
            result += " " + self.comment.strip()
        return result


def is_include_or_import(line):
    line = line.strip()
    if line.startswith("#include "):
        return True
    elif line.startswith("#import "):
        return True
    else:
        return False


def parse_include(line):
    remainder = line.strip()

    if remainder.startswith("#include "):
        isInclude = True
        remainder = remainder[len("#include ") :]
    elif remainder.startswith("#import "):
        isInclude = False
        remainder = remainder[len("#import ") :]
    elif remainder == "//":
        return None
    elif not remainder:
        return None
    else:
        print("Unexpected import or include: " + line)
        sys.exit(1)

    comment = None
    if remainder.startswith('"'):
        isQuote = True
        endIndex = remainder.find('"', 1)
        if endIndex < 0:
            print("Unexpected import or include: " + line)
            sys.exit(1)
        body = remainder[1:endIndex]
        comment = remainder[endIndex + 1 :]
    elif remainder.startswith("<"):
        isQuote = False
        endIndex = remainder.find(">", 1)
        if endIndex < 0:
            print("Unexpected import or include: " + line)
            sys.exit(1)
        body = remainder[1:endIndex]
        comment = remainder[endIndex + 1 :]
    else:
        print("Unexpected import or include: " + remainder)
        sys.exit(1)

    return include(isInclude, isQuote, body, comment)


def parse_includes(text):
    lines = text.split("\n")

    includes = []
    for line in lines:
        include = parse_include(line)
        if include:
            includes.append(include)

    return includes


def sort_include_block(text, filepath, filename, file_extension):
    includes = parse_includes(text)

    blocks = []

    file_extension = file_extension.lower()

    for include in includes:
        include.isInclude = False

    if file_extension in ("c", "cpp", "hpp"):
        for include in includes:
            include.isInclude = True
    elif file_extension in ("m"):
        for include in includes:
            include.isInclude = False

    # Make sure matching header is first.
    matching_header_includes = []
    other_includes = []

    def is_matching_header(include):
        filename_wo_ext = os.path.splitext(filename)[0]
        include_filename_wo_ext = os.path.splitext(os.path.basename(include.body))[0]
        return filename_wo_ext == include_filename_wo_ext

    for include in includes:
        if is_matching_header(include):
            matching_header_includes.append(include)
        else:
            other_includes.append(include)
    includes = other_includes

    def formatBlock(includes):
        lines = set([include.format() for include in includes])
        return "\n".join(sorted(lines))

    includeAngles = [
        include for include in includes if include.isInclude and not include.isQuote
    ]
    includeQuotes = [
        include for include in includes if include.isInclude and include.isQuote
    ]
    importAngles = [
        include
        for include in includes
        if (not include.isInclude) and not include.isQuote
    ]
    importQuotes = [
        include for include in includes if (not include.isInclude) and include.isQuote
    ]
    if matching_header_includes:
        blocks.append(formatBlock(matching_header_includes))
    if includeQuotes:
        blocks.append(formatBlock(includeQuotes))
    if includeAngles:
        blocks.append(formatBlock(includeAngles))
    if importQuotes:
        blocks.append(formatBlock(importQuotes))
    if importAngles:
        blocks.append(formatBlock(importAngles))

    return "\n".join(blocks) + "\n"


def sort_forward_decl_statement_block(text, filepath, filename, file_extension):
    lines = text.split("\n")
    lines = [line.strip() for line in lines if line.strip()]
    lines = list(set(lines))
    lines.sort()
    return "\n" + "\n".join(lines) + "\n"


def find_matching_section(text, match_test):
    lines = text.split("\n")
    first_matching_line_index = None
    for index, line in enumerate(lines):
        if match_test(line):
            first_matching_line_index = index
            break

    if first_matching_line_index is None:
        return None

    # Absorb any leading empty lines.
    while first_matching_line_index > 0:
        prev_line = lines[first_matching_line_index - 1]
        if prev_line.strip():
            break
        first_matching_line_index = first_matching_line_index - 1

    first_non_matching_line_index = None
    for index, line in enumerate(lines[first_matching_line_index:]):
        if not line.strip():
            # Absorb any trailing empty lines.
            continue
        if not match_test(line):
            first_non_matching_line_index = index + first_matching_line_index
            break

    text0 = "\n".join(lines[:first_matching_line_index])
    if first_non_matching_line_index is None:
        text1 = "\n".join(lines[first_matching_line_index:])
        text2 = None
    else:
        text1 = "\n".join(
            lines[first_matching_line_index:first_non_matching_line_index]
        )
        text2 = "\n".join(lines[first_non_matching_line_index:])

    return text0, text1, text2


def sort_matching_blocks(
    sort_name, filepath, filename, file_extension, text, match_func, sort_func
):
    unprocessed = text
    processed = None
    while True:
        section = find_matching_section(unprocessed, match_func)
        # print '\t', 'sort_matching_blocks', section
        if not section:
            if processed:
                processed = "\n".join(
                    (
                        processed,
                        unprocessed,
                    )
                )
            else:
                processed = unprocessed
            break

        text0, text1, text2 = section

        if processed:
            processed = "\n".join(
                (
                    processed,
                    text0,
                )
            )
        else:
            processed = text0

        # print 'before:'
        # temp_lines = text1.split('\n')
        # for index, line in enumerate(temp_lines):
        #     if index < 3 or index + 3 >= len(temp_lines):
        #         print '\t', index, line
        # # print text1
        # print
        text1 = sort_func(text1, filepath, filename, file_extension)
        # print 'after:'
        # # print text1
        # temp_lines = text1.split('\n')
        # for index, line in enumerate(temp_lines):
        #     if index < 3 or index + 3 >= len(temp_lines):
        #         print '\t', index, line
        # print
        processed = "\n".join(
            (
                processed,
                text1,
            )
        )
        if text2:
            unprocessed = text2
        else:
            break

    if text != processed:
        print(sort_name, filepath)
    return processed


def find_forward_class_statement_section(text):
    def is_forward_class_statement(line):
        return line.strip().startswith("@class ")

    return find_matching_section(text, is_forward_class_statement)


def find_forward_protocol_statement_section(text):
    def is_forward_protocol_statement(line):
        return line.strip().startswith("@protocol ") and line.strip().endswith(";")

    return find_matching_section(text, is_forward_protocol_statement)


def find_include_section(text):
    def is_include_line(line):
        return is_include_or_import(line)
        # return is_include_or_import_or_empty(line)

    return find_matching_section(text, is_include_line)


def sort_includes(filepath, filename, file_extension, text):
    # print 'sort_includes', filepath
    if file_extension not in (".h", ".m", ".mm"):
        return text
    return sort_matching_blocks(
        "sort_includes",
        filepath,
        filename,
        file_extension,
        text,
        find_include_section,
        sort_include_block,
    )


def sort_forward_class_statements(filepath, filename, file_extension, text):
    # print 'sort_class_statements', filepath
    if file_extension not in (".h", ".m", ".mm"):
        return text
    return sort_matching_blocks(
        "sort_class_statements",
        filepath,
        filename,
        file_extension,
        text,
        find_forward_class_statement_section,
        sort_forward_decl_statement_block,
    )


def sort_forward_protocol_statements(filepath, filename, file_extension, text):
    # print 'sort_class_statements', filepath
    if file_extension not in (".h", ".m", ".mm"):
        return text
    return sort_matching_blocks(
        "sort_forward_protocol_statements",
        filepath,
        filename,
        file_extension,
        text,
        find_forward_protocol_statement_section,
        sort_forward_decl_statement_block,
    )


def get_ext(file: str) -> str:
    return os.path.splitext(file)[1]


def process(filepath):
    short_filepath = filepath[len(git_repo_path) :]
    if short_filepath.startswith(os.sep):
        short_filepath = short_filepath[len(os.sep) :]

    filename = os.path.basename(filepath)
    if filename.startswith("."):
        raise Exception("shouldn't call process with dotfile")
    file_ext = get_ext(filename)

    with open(filepath, "rt") as f:
        text = f.read()

    original_text = text

    text = sort_includes(filepath, filename, file_ext, text)
    text = sort_forward_class_statements(filepath, filename, file_ext, text)
    text = sort_forward_protocol_statements(filepath, filename, file_ext, text)

    lines = text.split("\n")

    shebang = ""
    if lines[0].startswith("#!"):
        shebang = lines[0] + "\n"
        lines = lines[1:]

    while lines and lines[0].startswith("//"):
        lines = lines[1:]
    text = "\n".join(lines)
    text = text.strip()

    header = """//
//  Copyright (c) %s Open Whisper Systems. All rights reserved.
//

""" % (
        datetime.datetime.now().year,
    )
    text = shebang + header + text + "\n"

    if original_text == text:
        return

    print("Updating:", short_filepath)

    with open(filepath, "wt") as f:
        f.write(text)


def get_file_paths_in(path: str) -> Iterable[str]:
    for rootdir, _, filenames in os.walk(path):
        for filename in filenames:
            yield os.path.abspath(os.path.join(rootdir, filename))


def get_file_paths_for_commands(commands: Iterable[list[str]]) -> Iterable[str]:
    for command in commands:
        lines = subprocess.check_output(command, text=True).split("\n")
        for line in lines:
            file_path = os.path.abspath(os.path.join(git_repo_path, line))
            if os.path.exists(file_path):
                yield file_path


def should_process_file(file_path: str) -> bool:
    if get_ext(file_path) not in EXTENSIONS_TO_CHECK:
        return False

    for component in Path(file_path).parts:
        if component.startswith("."):
            return False
        if component.endswith(".framework"):
            return False
        if component in (
            "Pods",
            "ThirdParty",
            "Carthage",
        ):
            return False

    return True


def lint_swift_files(file_paths: set[str]) -> None:
    swift_file_paths = list(filter(lambda f: get_ext(f) == ".swift", file_paths))

    file_count = len(swift_file_paths)
    if file_count < 1:
        return

    env = os.environ.copy()
    env["SCRIPT_INPUT_FILE_COUNT"] = str(file_count)
    for i, file_path in enumerate(swift_file_paths):
        env[f"SCRIPT_INPUT_FILE_{i}"] = file_path

    try:
        lint_output = subprocess.check_output(
            ["swiftlint", "lint", "--fix", "--use-script-input-files"],
            env=env,
            text=True,
        )
    except subprocess.CalledProcessError as error:
        lint_output = error.output
    print(lint_output)

    try:
        lint_output = subprocess.check_output(
            ["swiftlint", "lint", "--use-script-input-files"], env=env, text=True
        )
    except subprocess.CalledProcessError as error:
        lint_output = error.output
    print(lint_output)


def check_diff_for_keywords():
    objc_keywords = [
        "OWSAbstractMethod\(",
        "OWSAssert\(",
        "OWSCAssert\(",
        "OWSFail\(",
        "OWSCFail\(",
        "ows_add_overflow\(",
        "ows_sub_overflow\(",
    ]

    swift_keywords = [
        "owsFail\(",
        "precondition\(",
        "fatalError\(",
        "dispatchPrecondition\(",
        "preconditionFailure\(",
        "notImplemented\(",
    ]

    keywords = objc_keywords + swift_keywords

    matching_expression = "|".join(keywords)
    command_line = (
        'git diff --staged | grep --color=always -C 3 -E "%s"' % matching_expression
    )
    try:
        output = subprocess.check_output(command_line, shell=True, text=True)
    except subprocess.CalledProcessError as e:
        # > man grep
        #  EXIT STATUS
        #  The grep utility exits with one of the following values:
        #  0     One or more lines were selected.
        #  1     No lines were selected.
        #  >1    An error occurred.
        if e.returncode == 1:
            # no keywords in diff output
            return
        else:
            # some other error - bad grep expression?
            raise e

    if len(output) > 0:
        print("⚠️  keywords detected in diff:")
        print(output)


if __name__ == "__main__":

    parser = argparse.ArgumentParser(description="Precommit script.")
    parser.add_argument(
        "--all", action="store_true", help="process all files in or below current dir"
    )
    parser.add_argument("--path", help="used to specify a path to process.")
    parser.add_argument(
        "--ref", help="process all files that have changed since the given ref"
    )
    args = parser.parse_args()

    all_file_paths: Iterable[str] = []
    clang_format_commit = "HEAD"
    if args.all:
        all_file_paths = get_file_paths_in(git_repo_path)
    elif args.path:
        all_file_paths = get_file_paths_in(args.path)
    elif args.ref:
        all_file_paths = get_file_paths_for_commands(
            [["git", "diff", "--name-only", "--diff-filter=ACMR", args.ref, "HEAD"]]
        )
        clang_format_commit = args.ref
    else:
        all_file_paths = get_file_paths_for_commands(
            [
                ["git", "diff", "--cached", "--name-only", "--diff-filter=ACMR"],
                ["git", "diff", "--name-only", "--diff-filter=ACMR"],
            ]
        )

    file_paths = set(filter(should_process_file, all_file_paths))

    lint_swift_files(file_paths)

    for file_path in file_paths:
        process(file_path)

    print("git clang-format...")
    # we don't want to format .proto files, so we specify every other supported extension
    print(
        subprocess.getoutput(
            'git clang-format --extensions "c,h,m,mm,cc,cp,cpp,c++,cxx,hh,hxx,cu,java,js,ts,cs" --commit %s'
            % clang_format_commit
        )
    )

    check_diff_for_keywords()