#!/usr/bin/env bash
# SPDX-License-Identifier: Apache-2.0
# Copyright (C) 2021 Arm Limited or its affiliates and Contributors. All rights reserved.

[[ "$TRACE" ]] && set -x
set -euo pipefail

# Workaround for the fact that different programs can have different ideas about
# what consitutes 'sorted' (particularly, affects 'comm' / 'jq' combination.)
export LC_COLLATE=C

BUILDER_CLANG_VERSION_SUFFIX=${BUILDER_CLANG_VERSION_SUFFIX--12}

awk_inplace() {
    for LAST in "$@"; do :; done
    TMP="$(mktemp)"
    awk "$@" > "$TMP"
    mv "$TMP" "$LAST"
}

patch_clang_version() {
    # Here we patch the clang version to include the commit SHA by defining
    # LLVM_REVISION and CLANG_REVISION We insert the #defines after the last
    # #include (which seems reasonably safe, but could be confused by an #if
    # #include #endif for example
    awk_inplace -c -v commit="\"$1\"" 'FNR==NR{ if (/#include/) p=NR; next} 1; FNR==p{ print "#define LLVM_REVISION " commit; print "#define CLANG_REVISION LLVM_REVISION"; }' ../clang/lib/Basic/Version.cpp ../clang/lib/Basic/Version.cpp
}

patch_llvm_version() {
    # We do a similar thing for LLVM tools -- define LLVM_VERSION_INFO manually
    awk_inplace -c -v commit="\"$1\"" 'FNR==NR{ if (/#include/) p=NR; next} 1; FNR==p{ print "#define LLVM_VERSION_INFO " commit }' ../llvm/lib/Support/CommandLine.cpp ../llvm/lib/Support/CommandLine.cpp
}

default_target_binaries() {
    # The set of files appearing in the position ` -o bin/<file> `
    # when you type 'ninja'.
    ninja -t commands | sed -En 's|^.* -o (bin/[^ ]+) .*$|\1|p'
}

default_target_rules() {
    # The set of rules appearing in the position:
    #   build bin/<file>: <rule> ...
    # for each binary listed in the default target.
    grep -E 'build ('"$(default_target_binaries | tr '\n' '|')"'):' build.ninja | awk '{print $3}'
}

library_rules() {
    # The set of rules which produce static libraries. No need to be too
    # selective here -- having extra is not harmful, since they will only have
    # an effect if used by a binary.
    ninja -t rules | grep -E 'C(XX)?_(STATIC|SHARED)_LIBRARY_LINKER'
}

symlink_rules() {
    # Rules which produce symlinks.
    echo CMAKE_SYMLINK_EXECUTABLE CMAKE_SYMLINK_LIBRARY CUSTOM_COMMAND
}

create_link_script() {
    # Produce a script which can link the binaries.

    # Problem: Want to identify set of things to include in the link script.
    # Can't just take all executables, since some of them may be unused/untested
    # and break the build for long periods of time.

    COMPDB2LINE_ARGS=(
        '-link-cmd-suffix='"${BUILDER_CLANG_VERSION_SUFFIX}"
        # Drop these, which point to the build directory and aren't used for linking.
        '-prune=-fmacro-prefix-map='
        '-prune=-ffile-prefix-map='
        # Version scripts have absolute paths.
        '-prune=-Wl,--version-script,'
        "-replacement=-fuse-ld=:-fuse-ld=\$LINKSCRIPT_LLD"
        'link-script'
    )
    # shellcheck disable=SC2046
    ninja -t compdb $(default_target_rules) $(library_rules) $(symlink_rules) \
        | compdb2line "${COMPDB2LINE_ARGS[@]}"
}

object_compdb() {
    # Produce the compdb for all object files.
    ninja -t rules | grep -E '^C(XX)?_COMPILER__' | xargs ninja -t compdb
}

build_system_is_clean() {
    ninja -n build.ninja |& grep 'ninja: no work to do.' &> /dev/null
}

main() {
    COMMIT_SHA="${1-$(git rev-parse HEAD)}"
    echo "Building $COMMIT_SHA" > build.log

    if [ "${MANYCLANGS_BUILD_DRYRUN-}" ];
    then
        echo "Dry run, pretending that the build failed."
        echo "\$MANYCLANGS_BUILD_DRYRUN is set, build skipped" > build.log
        exit 1
    fi

    if [[ -e CMakeCache.txt ]] && ! build_system_is_clean; then
        # Build system is out of date. Rebuild from empty to avoid any
        # possibility of cache contamination.
        rm -rf CMakeCache.txt cmake
        find . \( -iname '*.cmake' -o -iname CMakeFiles \) -exec rm -rf {} +
    fi

    if [ ! -e CMakeCache.txt ]; then
        REL_LLVM_PATH=../llvm/
        ABS_BASE_PATH=$(realpath $REL_LLVM_PATH/../)/
        REL_BASE_PATH=$(realpath --relative-to=. "$ABS_BASE_PATH")/

        command time -f 'Configured with manyclangs-cmake in %E' manyclangs-cmake "${REL_LLVM_PATH}" |& tee -a build.log

        # [determinism] Prevent absolute build path from leaking into llvm-config build variables.
        echo '#define LLVM_SRC_ROOT "/llvm-project/llvm"' >> ./tools/llvm-config/BuildVariables.inc
        echo '#define LLVM_OBJ_ROOT "/llvm-project/build"' >> ./tools/llvm-config/BuildVariables.inc

        # Patch build rules so that the compiler is only fed with relative
        # paths. This is necessary to work around the following issues:
        # * https://github.com/elfshaker/elfshaker/issues/11
        # * https://bugs.llvm.org/show_bug.cgi?id=52360
        # ("__PRETTY_FUNCTION__ leaks absolute paths even with -fmacro-prefix-map").

        # shellcheck disable=SC2016
        awk_inplace -c -v "search=$ABS_BASE_PATH" -v "replace=$REL_BASE_PATH" '
            /^  INCLUDES = / { gsub(" -I"search, " -I"replace, $0); print; next }
            /^build.*C(XX)?_COMPILER/ { gsub(" "search, " "replace, $0); print; next }
            { print }
        ' build.ninja

        # Do this before rewriting TARGET_FILE.
        # Create the link script.
        create_link_script > ./link.sh
        chmod +x link.sh

        # Avoid writing binaries out to disk, saves disk space when we're writing to tmpfs.
        sed -E '/TARGET_FILE = bin\/(clang-ast-dump|(clang|llvm)(-min)?-tblgen|clang-tidy-confusable-chars-gen|clang-pseudo-gen)/!s|TARGET_FILE = bin/.*|TARGET_FILE = /dev/null|' build.ninja > build.patched.ninja
    fi

    patch_clang_version "$COMMIT_SHA"

    cp ../llvm/LICENSE.TXT ./LICENSE-llvm.txt
    cp ../clang/LICENSE.TXT ./LICENSE-clang.txt
    {
        echo 'These builds were done as part of the elfshaker/manyclangs project,'
        echo 'please see https://github.com/elfshaker/manyclangs for more information.'
    } > ./README

    # patch_llvm_version invalidates llvm-tblgen and causes builds to run
    # slower! We may be able to work around this by having a separate source
    # tree for tblgen and setting LLVM_TABLEGEN and CLANG_TABLEGEN. That way the
    # TABLEGEN build would not be invalidated. Unsure though if tablegen were
    # the only thing invalidated in that circumstance the build system would
    # correctly rerun.

    # patch_llvm_version "$(git rev-parse HEAD)"

    # Delete files which aren't outputs of the build system.
    comm -23 \
        <(find . -iname '*.o' | sed 's|./||' | sort) \
        <(object_compdb | jq -r '[.[].output] | sort | .[]') \
        | xargs --no-run-if-empty rm -v

    # Clobber the resource directories since they're cheap to remake.
    rm -rf lib/clang

    # Use a jobclient-enabled ninja, if available.
    NINJA=ninja
    if command -v ninja-jobclient &> /dev/null
    then
        NINJA=ninja-jobclient
    fi

    # Use FILECLONE if possible to save disk space/bandwidth.
    # CCACHE_SLOPPINESS=time_macros is because the clang sources contain
    # __TIME__ etc in order to implement them in the compiler, but do not
    # actually use them. include_file_ctime,include_file_mtime are because we
    # know no-one else will be modifying header files which may be newly checked
    # out.
    if time CCACHE_FILECLONE=1 \
        CCACHE_NOCOMPRESS=1 \
        CCACHE_SLOPPINESS=time_macros,include_file_ctime,include_file_mtime \
        CCACHE_COMPILERCHECK=string:manyclangs-clang${BUILDER_CLANG_VERSION_SUFFIX} \
        "${NINJA}" -f build.patched.ninja |& tee -a build.log
    then
        # On build success, the log is not very interesting, so delete it.
        rm build.log
    else
        echo Build failed.
        exit 1
    fi
}

main "$@"
