#!/usr/bin/env perl 
# vim:sw=4

use strict;
use warnings;
use Getopt::Long;

# Command-line options

my $progname = $0;
$progname =~ s!^.*/!!;

my $cpus = 1;
my $debug = 0;

GetOptions( 
    "help" => \my $help,
    "cpus=i" => \$cpus,
    );


if ($help || @ARGV == 0){
    print STDERR <<EOF;
SYNOPSIS

    $progname [--help] [--cpus=N] FILE.vcf.gz ...

DESCRIPTION

    Prepare genotype reference files for use with Eagle and Minimac3 pipeline.
    Specifically, for each argument FILE.vcf.gz, the following files are created
    in the same directory:
    
        * FILE.m3vcf.gz -- a Minimac3 version of the VCF file

    The tool will also work with VCF files that end just with .gz, not .vcf.gz,
    although this is discouraged.

    Note that the script processes the files sequentially, except the --cpu=N
    option that is passed to Minimac3. To achieve parallelism, split VCF files
    into chunks and distribute across multiple invocations of this script.
EOF
    exit 2;
}

# Reading the config file

#############################
# load utility functions
#############################

use FindBin;
use lib "$FindBin::Bin";
use Ricopili::Utils qw(trans);

foreach (@ARGV) {
    /.gz$/ and -f or die "Argument $_ is not a .gz file; aborting";
}

# Processing

# Derived from https://metacpan.org/source/ROSCH/String-ShellQuote-1.04/ShellQuote.pm
# (Artistic License)
sub shell_quote($) {
    $_ = $_[0];
    my $escape = 0;

    if (m|[^\w!%+,\-./:=@^]|) {
        # ' -> '\''
        s/'/'\\''/g;

        # make multiple ' in a row look simpler
        # '\'''\'''\'' -> '"'''"'
        s|((?:'\\''){2,})|q{'"} . (q{'} x (length($1) / 4)) . q{"'}|ge;

        $_ = "'$_'";
        s/^''//;
        s/''$//;
    }
    return $_;
}

my $minimac = trans("minimac3loc") . "/Minimac3-omp";

foreach my $file (@ARGV) {
    my $prefix = $file;
    $prefix =~ s/(\.vcf)?\.gz$//;
    my $file_bgz = $file;
    $file_bgz =~ s/\.gz$/.bgz/;

    print STDERR "$progname: processing $file_bgz with Minimac3 (prefix for output files: $prefix)" if ($debug);
    system($minimac,
        "--cpus", $cpus,
        "--refHaps", $file_bgz,
        "--processReference",
        "--prefix", $prefix,
	"--lowMemory") == 0 or die "Minimac3 processing failed; aborting";

    system("touch",
	   "$file.m3vcf.fini");
}
