#!/usr/bin/env perl 
# vim:sw=4

use strict;
use warnings;
use Getopt::Long;

# Command-line options

my $progname = $0;
$progname =~ s!^.*/!!;

my $cpus = 1;

GetOptions( 
    "help" => \my $help,
    "cpus=i" => \$cpus,
    "debug"=> \my $debug,
    );


if ($help || @ARGV == 0){
    print STDERR <<EOF;
SYNOPSIS

    $progname [--help] [--cpus=N] FILE.vcf.gz ...

DESCRIPTION

    Prepare genotype reference files for use with Eagle and Minimac3 pipeline.
    Specifically, for each argument FILE.vcf.gz, the following files are created
    in the same directory:
    
        * FILE.vcf.bgz -- a block-gzipped VCF file (see bgzip(1))

    The tool will also work with VCF files that end just with .gz, not .vcf.gz,
    although this is discouraged.

    Note that the script processes the files sequentially, except the --cpu=N
    option that is passed to Minimac3. To achieve parallelism, split VCF files
    into chunks and distribute across multiple invocations of this script.

    --debug             extended output
    
EOF
    exit 2;
}

# Reading the config file

#############################
# load utility functions
#############################

use FindBin;
use lib "$FindBin::Bin";
use Ricopili::Utils qw(trans);


foreach (@ARGV) {
    /.gz$/ and -f or die "Argument $_ is not a .gz file; aborting";
}

# Processing

# Derived from https://metacpan.org/source/ROSCH/String-ShellQuote-1.04/ShellQuote.pm
# (Artistic License)
sub shell_quote($) {
    $_ = $_[0];
    my $escape = 0;

    if (m|[^\w!%+,\-./:=@^]|) {
        # ' -> '\''
        s/'/'\\''/g;

        # make multiple ' in a row look simpler
        # '\'''\'''\'' -> '"'''"'
        s|((?:'\\''){2,})|q{'"} . (q{'} x (length($1) / 4)) . q{"'}|ge;

        $_ = "'$_'";
        s/^''//;
        s/''$//;
    }
    return $_;
}

my $bgzip = trans("bgziploc") . "/bgzip";

foreach my $file (@ARGV) {
    my $prefix = $file;
    $prefix =~ s/(\.vcf)?\.gz$//;
    my $file_bgz = $file;
    $file_bgz =~ s/\.gz$/.bgz/;
    print STDERR "$progname: recompressing $file into $file_bgz"  if ($debug);
    system("zcat " . shell_quote($file) . " | " . shell_quote($bgzip) . " > " . shell_quote($file_bgz)) == 0
        or die "bgzipping failed; aborting\n";

    system("touch",
	   "$file.bgz.fini");
}
