#!/bin/bash

function print_help_and_exit
{
cat >&2 << 'EOF'

'voronota-js-voromqa' script provides an interface to VoroMQA dark and light methods.

Options:
    --input | -i              string  *  input file path or '_list' to read file paths from stdin
    --restrict-input          string     query to restrict input atoms, default is '[]'
    --sequence-file           string     input sequence file
    --select-contacts         string     query to select contacts, default is '[-min-seq-sep 2]'
    --inter-chain             string     set query to select contacts to '[-inter-chain]'
    --constraints-file        string     input distance constraints file
    --output-table-file       string     output table file path, default is '_stdout' to print to stdout
    --output-dark-scores      string     output PDB file with dark scores as B-factors
    --output-light-scores     string     output PDB file with light scores as B-factors
    --output-alignment        string     output alignment sequence alignment file
    --cache-dir               string     path to cache directory
    --tour-sort               string     tournament sorting mode, default is '_mono', options are '_homo', '_hetero' or custom
    --sbatch-parameters       string     parameters to run Slurm sbatch if input is '_list'
    --split-models-prefix     string     name prefix for splitting input PDB file by models
    --smoothing-window        number     residue scores smoothing window size, default is 0
    --processors              number     maximum number of processors to use, default is 1
    --casp-b-factors                     flag to write CASP-required B-factors in output PDB files
    --use-scwrl                          flag to use Scwrl4 to rebuild side-chains
    --guess-chain-names                  flag to guess chain names based on sequence nubmering
    --order-by-residue-id                flag to order atoms by residue ids
    --input-is-script                    flag to treat input file as vs script
    --as-assembly                        flag to treat input file as biological assembly
    --help | -h                          flag to display help message and exit

Standard output:
    space-separated table of scores
    
Examples:

    voronota-js-voromqa --input model.pdb
    
    voronota-js-voromqa --cache-dir ./cache --input model.pdb
    
    ls *.pdb | voronota-js-voromqa --cache-dir ./cache --input _list
    
    ls *.pdb | voronota-js-voromqa --cache-dir ./cache --input _list | column -t
    
    ls *.pdb | voronota-js-voromqa --cache-dir ./cache --input _list \
      --processors 8 \
      --inter-chain \
      --tour-sort _hetero
      
    ls *.pdb | voronota-js-voromqa --cache-dir ./cache --input _list \
      --processors 8 \
      --select-contacts '[-a1 [-chain A -rnum 1:500] -a2 [-chain B -rnum 1:500]]' \
      --tour-sort '-columns full_dark_score sel_energy -multipliers 1 -1 -tolerances 0.02 0.0'

EOF
exit 1
}

function substitute_id_in_filename
{
	SUBSTITUTE_BASENAME="$(basename "$1")"
	SUBSTITUTE_TEMPLATE="$2"
	
	echo "$SUBSTITUTE_TEMPLATE" \
	| sed "s|-BASENAME-|${SUBSTITUTE_BASENAME}|"
}

readonly ZEROARG=$0
ALLARGS=("$@")

if [[ $ZEROARG == *"/"* ]]
then
	cd "$(dirname ${ZEROARG})"
	export PATH="$(pwd):${PATH}"
	cd - &> /dev/null
fi

export LC_ALL=C

command -v voronota-js &> /dev/null || { echo >&2 "Error: 'voronota-js' executable not in binaries path"; exit 1; }

INFILE=""
RESTRICT_INPUT="[]"
SEQUENCE_FILE=""
SELECT_CONTACTS="[-min-seq-sep 2]"
CONSTRAINTS_FILE=""
OUTPUT_TABLE_FILE="_stdout"
OUTPUT_DARK_SCORES=""
OUTPUT_LIGHT_SCORES=""
OUTPUT_ALIGNMENT=""
CACHE_DIR=""
TOUR_SORT_MODE="_mono"
SBATCH_PARAMETERS=""
SPLIT_MODELS_PREFIX=""
SMOOTHING_WINDOW="0"
MAX_PROCESSORS="1"
CASP_B_FACTORS="false"
USE_SCWRL="false"
GUESS_CHAIN_NAMES="false"
ORDER_BY_RESIDUE_ID="false"
INPUT_IS_SCRIPT="false"
AS_ASSEMBLY="false"
HELP_MODE="false"

while [[ $# > 0 ]]
do
	OPTION="$1"
	OPTARG="$2"
	shift
	case $OPTION in
	-i|--input)
		INFILE="$OPTARG"
		shift
		;;
	--restrict-input)
		RESTRICT_INPUT="$OPTARG"
		shift
		;;
	--sequence-file)
		SEQUENCE_FILE="$OPTARG"
		shift
		;;
	--select-contacts)
		SELECT_CONTACTS="$OPTARG"
		shift
		;;
	--inter-chain)
		SELECT_CONTACTS="[-inter-chain]"
		;;
	--constraints-file)
		CONSTRAINTS_FILE="$OPTARG"
		shift
		;;
	--output-table-file)
		OUTPUT_TABLE_FILE="$OPTARG"
		shift
		;;
	--output-dark-scores)
		OUTPUT_DARK_SCORES="$OPTARG"
		shift
		;;
	--output-light-scores)
		OUTPUT_LIGHT_SCORES="$OPTARG"
		shift
		;;
	--output-alignment)
		OUTPUT_ALIGNMENT="$OPTARG"
		shift
		;;
	--cache-dir)
		CACHE_DIR="$OPTARG"
		shift
		;;
	--tour-sort)
		TOUR_SORT_MODE="$OPTARG"
		shift
		;;
	--sbatch-parameters)
		SBATCH_PARAMETERS="$OPTARG"
		shift
		;;
	--split-models-prefix)
		SPLIT_MODELS_PREFIX="$OPTARG"
		shift
		;;
	--smoothing-window)
		SMOOTHING_WINDOW="$OPTARG"
		shift
		;;
	--processors)
		MAX_PROCESSORS="$OPTARG"
		shift
		;;
	--casp-b-factors)
		CASP_B_FACTORS="true"
		;;
	--use-scwrl)
		USE_SCWRL="true"
		;;
	--guess-chain-names)
		GUESS_CHAIN_NAMES="true"
		;;
	--order-by-residue-id)
		ORDER_BY_RESIDUE_ID="true"
		;;
	--input-is-script)
		INPUT_IS_SCRIPT="true"
		;;
	--as-assembly)
		AS_ASSEMBLY="true"
		;;
	-h|--help)
		HELP_MODE="true"
		;;
	*)
		echo >&2 "Error: invalid command line option '$OPTION'"
		exit 1
		;;
	esac
done

if [ -z "$INFILE" ] || [ "$HELP_MODE" == "true" ]
then
	print_help_and_exit
fi

if [ "$INFILE" != "_list" ] && [ "$INFILE" != "_stream" ] && [ ! -s "$INFILE" ]
then
	echo >&2 "Error: input file '$INFILE' does not exist"
	exit 1
fi

if [ "$INFILE" == "_stream" ]
then
	readonly TMPLDIR=$(mktemp -d)
	trap "rm -r $TMPLDIR" EXIT
	
	cat > "$TMPLDIR/input_stream"
	
	if [ ! -s "$TMPLDIR/input_stream" ]
	then
		echo >&2 "Error: no stdin data"
		exit 1
	fi
	
	"$ZEROARG" "${ALLARGS[@]}" --input "$TMPLDIR/input_stream"
	
	exit 0
fi

if [ "$INFILE" == "_list" ]
then
	readonly TMPLDIR=$(mktemp -d)
	trap "rm -r $TMPLDIR" EXIT
	
	cat | egrep . | sort | uniq > "$TMPLDIR/input_list"
	
	if [ ! -s "$TMPLDIR/input_list" ]
	then
		echo >&2 "Error: no stdin data"
		exit 1
	fi
	
	mkdir -p "$TMPLDIR/children_tables"
	
	if [ -n "$SBATCH_PARAMETERS" ]
	then
		mkdir -p "$TMPLDIR/slurm_logs"
		
		cat "$TMPLDIR/input_list" \
		| awk -v outdir="$TMPLDIR/children_tables" '{print "--input " $1 " --output-table-file " outdir "/" NR ".pdb"}' \
		| xargs -L 1 sbatch -o "$TMPLDIR/slurm_logs/slurmjob-%j.out" -e "$TMPLDIR/slurm_logs/slurmjob-%j.err" $SBATCH_PARAMETERS "$ZEROARG" "${ALLARGS[@]}" \
		| egrep '^Submitted batch job ' \
		| awk '{print $4}' \
		> "$TMPLDIR/slurm_job_ids"
		
		sleep 1
		REMAINING_SLURM_JOBS="$(squeue | grep -f "$TMPLDIR/slurm_job_ids" | wc -l)"
		while [ "$REMAINING_SLURM_JOBS" -gt "0" ]
		do
			sleep 5
			REMAINING_SLURM_JOBS="$(squeue | grep -f "$TMPLDIR/slurm_job_ids" | wc -l)"
		done
		
		find "$TMPLDIR/slurm_logs/" -type f -not -empty | xargs -L 1 cat >&2
	else
		cat "$TMPLDIR/input_list" \
		| awk -v outdir="$TMPLDIR/children_tables" '{print "--input " $1 " --output-table-file " outdir "/" NR ".pdb"}' \
		| xargs -L 1 -P "$MAX_PROCESSORS" "$ZEROARG" "${ALLARGS[@]}"
	fi
	
	find "$TMPLDIR/children_tables" -type f -not -empty \
	| sort \
	| xargs -L 1 cat \
	| awk '{if(NR==1 || NR%2==0) print $0}' \
	> "$TMPLDIR/table"
	
	if [ "$TOUR_SORT_MODE" != "_none" ]
	then
		if [ "$TOUR_SORT_MODE" == "_mono" ]
		then
			TOUR_SORT_MODE="-columns full_dark_score"
		fi
		
		if [ "$TOUR_SORT_MODE" == "_homo" ]
		then
			TOUR_SORT_MODE="-columns full_dark_score sel_energy -multipliers 1 -1 -tolerances 0.01 0.0"
		fi
		
		if [ "$TOUR_SORT_MODE" == "_hetero" ]
		then
			TOUR_SORT_MODE="-columns full_dark_score sel_energy -multipliers 1 -1 -tolerances 0.02 0.0"
		fi
		
		cat "$TMPLDIR/table" \
		| voronota-js "js:voronota_tournament_sort('-input-file _stdin -output-file _stdout -add-win-score-column tour_score ${TOUR_SORT_MODE}');"
	else
		cat "$TMPLDIR/table"
	fi
	
	exit 0
fi

if [ -n "$SPLIT_MODELS_PREFIX" ]
then
	readonly TMPLDIR=$(mktemp -d)
	trap "rm -r $TMPLDIR" EXIT
	
	mkdir -p "$TMPLDIR/splits"

	{
		echo "voronota_split_pdb_file('-input-file', '$INFILE', '-output-file', '_stdout', '-prefix', '$TMPLDIR/splits/$SPLIT_MODELS_PREFIX', '-postfix', '.pdb');"
		echo "voronota_assert_full_success('Failed to split pdb file');"
	} \
	| voronota-js \
	| "$ZEROARG" "${ALLARGS[@]}" --input _list --split-models-prefix ""
	
	exit 0
fi

CACHE_DIR="$(substitute_id_in_filename "$INFILE" "$CACHE_DIR")"
OUTPUT_TABLE_FILE="$(substitute_id_in_filename "$INFILE" "$OUTPUT_TABLE_FILE")"
OUTPUT_DARK_SCORES="$(substitute_id_in_filename "$INFILE" "$OUTPUT_DARK_SCORES")"
OUTPUT_LIGHT_SCORES="$(substitute_id_in_filename "$INFILE" "$OUTPUT_LIGHT_SCORES")"
SEQUENCE_FILE="$(substitute_id_in_filename "$INFILE" "$SEQUENCE_FILE")"
OUTPUT_ALIGNMENT="$(substitute_id_in_filename "$INFILE" "$OUTPUT_ALIGNMENT")"

INFILE_BASENAME="$(basename $INFILE)"

{
cat << EOF

params={}

params.input_file='$INFILE';
params.input_file_name='$INFILE_BASENAME';
params.cache_dir='$CACHE_DIR';
params.restrict_input_atoms='$RESTRICT_INPUT';
params.contacts_selection='$SELECT_CONTACTS';
params.input_is_script='$INPUT_IS_SCRIPT';
params.input_as_assembly='$AS_ASSEMBLY';
params.output_table_file='$OUTPUT_TABLE_FILE';
params.output_dark_scores='$OUTPUT_DARK_SCORES';
params.output_light_scores='$OUTPUT_LIGHT_SCORES';
params.smoothing_window='$SMOOTHING_WINDOW';
params.sequence_file='$SEQUENCE_FILE';
params.output_alignment='$OUTPUT_ALIGNMENT';
params.guess_chain_names='$GUESS_CHAIN_NAMES';
params.order_by_residue_id='$ORDER_BY_RESIDUE_ID';
params.use_scwrl='$USE_SCWRL';
params.casp_b_factors='$CASP_B_FACTORS';

EOF

cat << 'EOF'

if(params.input_file===undefined || params.input_file==="")
{
	throw ("No input file");
}

if(params.cache_dir===undefined)
{
	params.cache_dir='';
}

if(params.restrict_input_atoms===undefined || params.restrict_input_atoms==="")
{
	params.restrict_input_atoms='[]';
}

if(params.contacts_selection===undefined || params.contacts_selection==="")
{
	params.contacts_selection='[-min-seq-sep 2]';
}

if(params.input_is_script===undefined || params.input_is_script==="")
{
	params.input_is_script="false";
}

if(params.input_as_assembly===undefined || params.input_as_assembly==="")
{
	params.input_as_assembly="false";
}

if(params.smoothing_window===undefined || params.smoothing_window==="")
{
	params.smoothing_window=0;
}

if(params.output_table_file===undefined || params.output_table_file==="")
{
	params.output_table_file="_stdout";
}

if(params.output_dark_scores===undefined)
{
	params.output_dark_scores="";
}

if(params.output_light_scores===undefined)
{
	params.output_light_scores="";
}

if(params.input_is_script=="true")
{
	voronota_source("-file", params.input_file);
	voronota_assert_partial_success("Failed when running provided input script");
	
	voronota_list_objects();
	voronota_assert_full_success("Failed to list objects");
	var input_name_after_script=voronota_last_output().results[0].output.objects[0].name;
	
	voronota_rename_object("-original", input_name_after_script, "-new", params.input_file_name);
	voronota_assert_full_success("Failed to rename object");
}
else
{
	voronota_import("-file", params.input_file, "-as-assembly", params.input_as_assembly);
	voronota_assert_partial_success("Failed to import file");
}

voronota_restrict_atoms("-use", "[-protein]");
voronota_assert_full_success("Failed to restrict input atoms to protein only");

if(params.guess_chain_names==="true")
{
	voronota_set_chain_names_by_guessing();
	voronota_assert_full_success("Failed to set chain names by guessing");
}

if(params.order_by_residue_id==="true")
{
	voronota_sort_atoms_by_residue_id();
	voronota_assert_full_success("Failed to order atoms by residue ids");
}

if(params.use_scwrl==="true")
{
	voronota_scwrl();
	voronota_assert_full_success("Failed to use Scwrl4");
}

voronota_restrict_atoms("-use", params.restrict_input_atoms);
voronota_assert_full_success("Failed to restrict input atoms");

if(params.sequence_file!=="")
{
	if(params.output_alignment!=="")
	{
		voronota_set_adjunct_of_atoms_by_sequence_alignment("-name", "refseq", "-sequence-file", params.sequence_file, "-alignment-file", params.output_alignment);
	}
	else
	{
		voronota_set_adjunct_of_atoms_by_sequence_alignment("-name", "refseq", "-sequence-file", params.sequence_file);
	}
	voronota_assert_full_success("Failed to set residue sequence number adjunct");
	
	voronota_restrict_atoms_and_renumber_residues_by_adjunct("-name", "refseq");
	voronota_assert_full_success("Failed to renumber residues by adjunct");
}

EOF

if [ -n "$CONSTRAINTS_FILE" ] && [ -s "$CONSTRAINTS_FILE" ]
then
	cat "$CONSTRAINTS_FILE" \
	| while read CONSTRAINT_LINE
	do
		if [ -n "$CONSTRAINT_LINE" ]
		then
			echo "voronota_check_distance_constraint('${CONSTRAINT_LINE}');"
			echo "voronota_assert_full_success('Failed distance constraint');"
		fi
	done
fi

cat << 'EOF'

voronota_construct_or_load_quality_scores("-cache-dir", params.cache_dir);
voronota_assert_full_success("Failed to compute or load quality scores");

voronota_select_contacts("-use", params.contacts_selection, "-name", "relevant_contacts")
voronota_assert_full_success("Failed to select requested contacts");

voronota_voromqa_local("-global-adj-prefix", "voromqa_light_full");
voronota_assert_full_success("Failed to compute quality scores");

voronota_voromqa_local("-contacts", "[relevant_contacts]", "-global-adj-prefix", "voromqa_light_selected");
voronota_assert_full_success("Failed to compute quality scores");

voronota_voromqa_dark_local("-global-adj-prefix", "voromqa_dark_full");
voronota_assert_full_success("Failed to compute quality scores");

voronota_voromqa_dark_local("-atoms", "[-sel-of-contacts relevant_contacts]", "-global-adj-prefix", "voromqa_dark_selected");
voronota_assert_full_success("Failed to compute quality scores");

voronota_clash_score("-use", "[relevant_contacts]", "-global-adj-prefix", "clash");
voronota_assert_full_success("Failed to compute clash score");

voronota_summarize_linear_structure("-global-adj-prefix", "linear_summary");
voronota_assert_full_success("Failed to summarize linear structure");

voronota_rename_global_adjunct("voromqa_dark_full_quality_score", "full_dark_score");
voronota_rename_global_adjunct("voromqa_light_full_atoms_quality_score", "full_light_score");
voronota_rename_global_adjunct("voromqa_dark_full_residues_count", "full_residues");
voronota_rename_global_adjunct("voromqa_light_full_atoms_count", "full_atoms");
voronota_rename_global_adjunct("voromqa_dark_selected_quality_score", "sel_dark_score");
voronota_rename_global_adjunct("voromqa_light_selected_atoms_quality_score", "sel_light_score");
voronota_rename_global_adjunct("voromqa_light_selected_contacts_pseudo_energy", "sel_energy");
voronota_rename_global_adjunct("voromqa_light_selected_contacts_pseudo_energy_norm", "sel_energy_norm");
voronota_rename_global_adjunct("voromqa_light_selected_residues_count", "sel_residues");
voronota_rename_global_adjunct("voromqa_light_selected_atoms_count", "sel_atoms");
voronota_rename_global_adjunct("voromqa_light_selected_contacts_count", "sel_contacts");
voronota_rename_global_adjunct("voromqa_light_selected_contacts_area", "sel_contacts_area");
voronota_rename_global_adjunct("clash_score", "sel_clash_score");
voronota_rename_global_adjunct("linear_summary_ss_alpha", "ss_alpha");
voronota_rename_global_adjunct("linear_summary_ss_beta", "ss_beta");

if(params.output_table_file!=="_stdout")
{
	shell('mkdir -p "$(dirname '+params.output_table_file+')"');
}

voronota_export_global_adjuncts("-file", params.output_table_file, "-adjuncts",
  ["full_dark_score", "full_light_score", "full_residues", "full_atoms",
   "sel_dark_score", "sel_light_score", "sel_energy", "sel_energy_norm", "sel_residues", "sel_atoms",
   "sel_contacts", "sel_contacts_area", "sel_clash_score", "ss_alpha", "ss_beta"]);
voronota_assert_full_success("Failed to export scores");

if(params.output_dark_scores!=="")
{
	if(params.output_dark_scores!=="_stdout")
	{
		shell('mkdir -p "$(dirname '+params.output_dark_scores+')"');
	}
	
	voronota_set_adjunct_of_atoms_by_residue_pooling("-source-name", "vd1", "-destination-name", "vd1s", "-pooling-mode min", "-smoothing-window", params.smoothing_window);
	voronota_assert_full_success("Failed to pool and smooth residue adjuncts");
	
	if(params.casp_b_factors==="true")
	{
		voronota_set_adjunct_of_atoms("-name oc -value 1.0");
		voronota_set_adjunct_of_atoms_by_expression("-expression _reverse_s -input-adjuncts vd1s -parameters 0.5 0.1 0.5 0.2 5.0 -output-adjunct vd1sd");
		voronota_assert_full_success("Failed to transform adjuncts");
		
		voronota_export_atoms("-file", params.output_dark_scores, "-as-pdb", "-pdb-ter", "-pdb-b-factor", "vd1sd");
		voronota_assert_full_success("Failed to export pdb file");
	}
	else
	{
		voronota_export_atoms("-file", params.output_dark_scores, "-as-pdb", "-pdb-ter", "-pdb-b-factor", "vd1s");
		voronota_assert_full_success("Failed to export pdb file");
	}
}

if(params.output_light_scores!=="")
{
	if(params.output_light_scores!=="_stdout")
	{
		shell('mkdir -p "$(dirname '+params.output_light_scores+')"');
	}
	
	voronota_set_adjunct_of_atoms_by_residue_pooling("-source-name", "voromqa_score_r", "-destination-name", "voromqa_score_rs", "-pooling-mode min", "-smoothing-window", params.smoothing_window);
	voronota_assert_full_success("Failed to pool and smooth residue adjuncts");
	
	if(params.casp_b_factors==="true")
	{
		voronota_set_adjunct_of_atoms("-name oc -value 1.0");
		voronota_set_adjunct_of_atoms_by_expression("-expression _reverse_s -input-adjuncts voromqa_score_rs -parameters 0.3 0.1 0.5 0.2 3.0 -output-adjunct voromqa_score_rsd");
		voronota_assert_full_success("Failed to transform adjuncts");
		
		voronota_export_atoms("-file", params.output_light_scores, "-as-pdb", "-pdb-ter", "-pdb-b-factor", "voromqa_score_rsd");
		voronota_assert_full_success("Failed to export pdb file");
	}
	else
	{
		voronota_export_atoms("-file", params.output_light_scores, "-as-pdb", "-pdb-ter", "-pdb-b-factor", "voromqa_score_rs");
		voronota_assert_full_success("Failed to export pdb file");
	}
}
EOF

} \
| voronota-js




