#!/usr/bin/env bash
#
# usage:
#   maint/cargo-crate-owners
#
# Lists the ownerships of all published crates to stderr,
# and checks that they're all the same.
#
# Exit status is
#   0      all crates have the same owners
#   2      some crates have varying own ers
#   other  trouble
#
# crates which are mentioned in the workspace, but which have never been published,
# are ignored.
#
#
# Override facility
#
# Sometimes a crate ownership invitation can remain unaccepted for reasons
# relating to the personal circumstances of the prospective owner.
# We have a facility for avoiding CI failure in this situation, without
# exposing Personally Identifying Information more widely than needed.
#
# To use this facility, in gitlab CI, create a "CI/CD variable"
# with the following properties:
#    - Type: File
#    - Environments: all (default)
#    - Visibility: Visible (default)
#    - Flags: [ ] Protect (default)
#             [ ] Expand variable reference (changed)
#    - Key `MAINT_CARGO_CRATE_OWNERS_UNTIL`
#    - Value, a multi-line string as described below
# This causes gitlab to set an environment variable MAINT_CARGO_CRATE_OWNERS_UNTIL
# to the name of a file, containing the multi-line string.
#
# The file contents is in this form:
#     v1 YYYY-MM-DD
#     crates.io-username
#     crates.io-username
#     ...
#
# The file (the value) may also be empty, in which case it will be ignored.
# (This is to allow you to preserve the variable properties,
# while removing the actual data.)
#
# `v1` is a fixed string, to allow future evolution of this protocol.
# Everything on a line after a # is a comment and is ignored.
#
# YYYY-MM-DD is an expiry date for this data.
# After this date, this script will start to unconditionally fail!
# You should set it to the time when you expect to be able to remove the exception,
# and then this script will help enforce that the exception is removed.
#
# Every following line should be a username.
# That user will be treated as if they are an owner of every crate,
# regardless of whether they really are.

set -e
set -o pipefail

maint=maint
# shellcheck source=maint/crates-io-utils.sh
source "$maint"/crates-io-utils.sh
crates=$("$maint"/list_crates)

if [ $# != 0 ]; then fail 'bad usage: no arguments allowed'; fi

# Reads the file named by MAINT_CARGO_CRATE_OWNERS_UNTIL (see spec above),
# and digests it into "$tmp"//override.json, which is a sequence of strings
# being the usernames to pretend every crate has as owner.
read_owners_override () {
    : >"$tmp"/override.json

    if [ "$MAINT_CARGO_CRATE_OWNERS_UNTIL" = '' ]; then return; fi
    # `read` gives status 1 on missing final newline, which would result in
    # us perhaps ignoring the last line if we just read it directly!
    # But, perl will happily read such a file, and we can then use it fix it up.
    # Also, we use perl to do comment filtering.
    perl -pe 's{\#.*}{}; s/\n*$/\n/' <"$MAINT_CARGO_CRATE_OWNERS_UNTIL" >"$tmp"/override-filtered
    exec 3<"$tmp"/override-filtered
    if ! read -r <&3 version override_expiry; then
	# We couldn't read even one line.  The file is empty: ignore it.
	return
    fi
    case "$version" in
	v1) ;;
	*) fail "MAINT_CARGO_CRATE_OWNERS_UNTIL: expected version v1, got $version" ;;
    esac
    case "$override_expiry" in
	[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]) ;;
	*) fail "MAINT_CARGO_CRATE_OWNERS_UNTIL: bad expiry" ;;
    esac
    expiry_tt=$(date -d "$override_expiry" +%s)
    current_tt=$(date -u +%s)
    if [ "$current_tt" -gt "$expiry_tt" ]; then
	fail "MAINT_CARGO_CRATE_OWNERS_UNTIL: override data expired! (expiry date $override_expiry)"
    fi
    while read -r <&3 user; do
	if [ "$user" = '' ]; then continue; fi
	printf "%s" "$user" | jq --slurp -R . >>"$tmp"//override.json
    done
}

tmp_trap_exit_setup

read_owners_override

override_changed=0

for p in $crates; do
    printf "checking owners of %-40s " "$p"
    crates_io_api_call "v1/crates/$p/owners" .users "$tmp/p,$p.json"

    case "$http_code" in
	404)
	    echo "unpublished"
	    continue
	    ;;
	200)
	    ;;
	*)
	    fail 'internal error'
	    ;;
    esac

    jq -S '.users[].login' <"$tmp/p,$p.json" >"$tmp/owners-1,$p.json"
    cat "$tmp"/override.json >>"$tmp/owners-1,$p.json"
    jq -s 'unique | .[]' <"$tmp/owners-1,$p.json" >>"$tmp/owners,$p.json"

    set +e
    cmp -s "$tmp"/owners{-1,},"$p.json"
    rc=$?
    set -e
    case "$rc" in
	0) ;;
	1) override_changed=$(( override_changed + 1 )) ;;
	*) fail "cmp failed" ;;
    esac

    hash=$(sha256sum <"$tmp/owners,$p.json")
    hash=${hash%% *}
    cp "$tmp/owners,$p.json" "$tmp/byhash.$hash.owners"
    printf '%s\n' "$p" >>"$tmp/byhash.$hash.packages"

    n_owners=$(jq <"$tmp/owners,$p.json" 1 | wc -l)
    n_packages=$(wc -l <"$tmp/byhash.$hash.packages")
    printf '%d owners (group size: %d)\n' "$n_owners" "$n_packages"
done

wc -l "$tmp"/byhash.*.packages | grep -v ' total$' | sort -rn >"$tmp/list"

n_groups=$(wc -l <"$tmp/list")

if [ "$n_groups" = 1 ]; then
    echo
    echo 'all ownerships are identical:'
    echo
    status=0
else
    cat <<END

ownerships of published crates vary!
$n_groups different sets of owners

END
    status=2
fi

# in case we want to redirect the report at some future point
exec 4>&2

exec 3<"$tmp/list"
# shellcheck disable=SC2162 # we don't need -r, it has no backslashes
while read <&3 n_packages packages_file; do
    owners_file="${packages_file%.packages}.owners"
    n_packages=$(wc -l <"$packages_file")
    echo "$n_packages package(s) have the following owner(s):" >&4
    sed 's/^/\t/' "$owners_file" | cat -v >&4
    echo "  those are owner(s) of the following package(s):" >&4
    sed 's/^/\t/' "$packages_file" >&4
    echo >&4
done

if [ "$override_expiry" != "" ]; then
    cat <<END
NB, used MAINT_CARGO_CRATE_OWNERS_UNTIL, expiry $override_expiry:
Pretended owners added to $override_changed crates.
END
fi

tmp_trap_exit_finish_status $status
