#!/usr/bin/perl
use strict;

my $version = "1.3.0";
my $progname = $0;
$progname =~ s!^.*/!!;

use Compress::Zlib ;

#############################
# read config file
#############################

my $conf_file = $ENV{HOME}."/ricopili.conf";
my %conf = ();

die $!."($conf_file)" unless open FILE, "< $conf_file";
while (my $line = <FILE>){
    my @cells = split /\s+/, $line;
    $conf{$cells[0]} = $cells[1];
}
close FILE;

sub trans {
    my ($expr)=@_;
    unless (exists $conf{$expr}) {
	die "config file without entry: $expr\n";
    }
    $conf{$expr};
}

my $ploc = &trans("ploc");


###############################################



##### help message
my $usage = "
Usage : $progname haps-file (out of impute2)

version: $version

  --outname STRING    outdir, mandatory
  --outdir STRING     outname, mandatory
  --fam STRING        fam-file, mandatory
  --bim STRING        bim-file, mandatory
  --chr INT           chromosome
  --help              print this message and exit

  --prefix STRING     prefix into famfile


 created by Stephan Ripke 2012 at MGH, Boston, MA
 in the frame of the PGC

";

my $prefix = "";

use Getopt::Long;
GetOptions( 

    "help"=> \my $help,
    "outname=s"=> \my $outname,
    "prefix=s"=> \my $prefix,
    "outdir=s"=> \my $outdir,
    "fam=s"=> \my $famname,
    "bim=s"=> \my $bimname,
    "chr=i"=> \my $chr,

    );

die ($usage) if $help;
die ($usage) unless $famname;
die ($usage) unless $bimname;
die ($usage) unless $outname;
die ($usage) unless $outdir;
die ($usage) unless $chr;


# die "$usage" if (@ARGV != 1);


##########################################
# subroutine to split a plink-output-line with references
##########################################

sub split_line_ref {
    my ($line)=${$_[0]};
    chomp($line);
    $line =~ s/^[\s]+//g;
    my @cols=  split /\s+/, $line;
    \@cols;
}




###################################################
###  system call with test if successfull
###################################################

my @cmd_collect;

sub mysystem(){
    my ($systemstr)="@_";
    system($systemstr);
    my $status = ($? >> 8);
    die "$systemstr\n->system call failed: $status" if ($status != 0);
    push @cmd_collect, $systemstr;

}





############################################
######### BEGIN
########################################

my @haps_collection = @ARGV;
my $nf = @ARGV - 1;

my $haps_file1 = @haps_collection[0];
my $haps_file = "$haps_file1.combined";
my $haps_file_gz = "$haps_file1.combined.gz";



my @filehandles_gz;

################## create one thousand subdirs and thousand filehandles
foreach my $infile (@haps_collection) {

    my $igz = gzopen("$infile", "rb")  or die "Cannot open file $infile: $gzerrno\n" ;
    push(@filehandles_gz, $igz);

}



my $ogz = gzopen("$haps_file_gz", "wb")  or die "Cannot open file $haps_file_gz: $gzerrno\n" ;


#$ogz->gzwrite($header) ;

my $igz = $filehandles_gz[0];


while ($igz->gzreadline(my $line)){
    chomp($line);
    my @cells = @{&split_line_ref(\$line)};

    if ($nf > 0) {
	foreach my $igzn (1..$nf) {
	    my $igz_add = $filehandles_gz[$igzn];
	    $igz_add->gzreadline(my $line);
	    chomp($line);
	    my @cells_add = @{&split_line_ref(\$line)};


	    foreach (1..5) {
		shift (@cells_add);
	    }

	    @cells = (@cells,@cells_add);
	    
	}
    }
    $ogz->gzwrite("@cells\n");

}

foreach my $igzn (1..$nf-1) {
    my $igz_add = $filehandles_gz[$igzn];
    $igz->gzclose();
}
$ogz->gzclose();

#exit;



##########
# bring multiple files into one
########


my $info_file = $haps_file1;
$info_file =~ s/.gz$//;
$info_file .= "_info";


#print "finished big guy\n";
#exit;






#my $haps_file = $ARGV[0];

my $dosout = "$outdir/$outname";
my $mapout = "$outdir/$outname.out.dosage.map";
my $ngtout = "$outdir/$outname.out.dosage.ngt";
my $famout = "$outdir/$outname.out.dosage.fam";
my $finiout = "$outdir/$outname.out.dosage.fini";

my $famname_in = $famname;


if ($prefix) {
    die $!."($famname)" unless open IF, "< $famname";
    die $!."($famname.pre)" unless open OF, "> $famname.pre";

    while (my $line = <IF>){
	chomp($line);

	print OF "$prefix*$line\n";
    }
    close IF;
    close OF;
    $famname_in = "$famname.pre";
    &mysystem ("cp $famname.pre $famout");
}
else {
    &mysystem ("cp $famname $famout");
}



die $!."($famname_in)" unless open IF, "< $famname_in";
die $!."($famname_in.ow)" unless open OF, "> $famname_in.ow";

while (my $line = <IF>){
    my @cells = @{&split_line_ref(\$line)};
    if ($cells[5] != 2 && $cells[5] != 1) {
	$cells[5] = 1;
    }
    print OF "@cells\n";
}
close IF;
close OF;


# preprocess dosages to ensure probabilities sum to 1
my $twodos_tmp = "$haps_file1.combined.tmp2dos.gz";
my $twodos_tmp_success = "$twodos_tmp.fini";
my $imp_proc = "impprob_to_2dos $haps_file_gz $twodos_tmp";
print "$imp_proc\n";
unless (-e $twodos_tmp_success) {
    &mysystem ($imp_proc);
}

# check success
# if successful, remove $haps_file_gz now to save space
unless (-e $twodos_tmp_success) {
    die "Failed to create $twodos_tmp from $haps_file_gz";
}


#my $sys_loc = "$ploc/plink --dosage $haps_file_gz noheader skip0=1 skip1=1 format=3 Z --fam $famname_in.ow --allow-no-sex --write-dosage --out $dosout";
my $sys_loc = "$ploc/plink --dosage $twodos_tmp noheader skip0=1 skip1=1 format=2 Z --fam $famname_in.ow --allow-no-sex --write-dosage --out $dosout";
print "$sys_loc\n";
&mysystem ($sys_loc);

#exit;



my %ngt;
my $cc = 0;
die $!."($bimname)" unless open FILE, "< $bimname";
while (my $line = <FILE>){
    my @cells = @{&split_line_ref(\$line)};
    $ngt{$cells[1]} = 1;
    $cc++;
}
close FILE;
#print "N:$cc\n";
#print "$info_file\n";
#sleep(3);

if ($info_file) {
    die $!."($info_file)" unless open IF, "< $info_file";
    die $!."($mapout)" unless open MA, "> $mapout";
    die $!."($ngtout)" unless open NGT, "> $ngtout";
    my $line = <IF>;
    while (my $line = <IF>){
	my @cells = @{&split_line_ref(\$line)};

	my $snp = $cells[1];
	my $pos = $cells[2];


	my $bas_str = "$chr $snp 0 $pos";
	print MA "$bas_str\n";
	my $ngt_loc = 0;
	if (exists $ngt{$snp}) {
	    $ngt_loc = 1;
	}
	print NGT "$bas_str $ngt_loc\n";
    }
    close IF;
    close MA;
    close NGT;
}


#&mysystem ("gzip -c $dosout.out.dosage > $dosout.out.dosage.tmp.gz");
#&mysystem ("rm $dosout.out.dosage");
#&mysystem ("mv $dosout.out.dosage.tmp.gz $dosout.out.dosage.gz");

#print "no remove\n";
#exit;


die $!."$dosout.dos.cmd" unless open BC, "> $dosout.dos.cmd";
foreach (@cmd_collect) {
    print BC "$_\n";
}
close BC;



&mysystem ("touch $finiout");
&mysystem ("rm $haps_file_gz");

print "done\n";
