#!/usr/bin/perl
use strict;
  

########################################
#
#  version 24: 
#  
#  chunk results and then meta-chunk
#  area_plot_speed
#  new manhattan plot
#  new areator
#  chrX
#  new gwascatalog
#  exclude refind: cat pi_sub/prephase_job_list_multi1 | awk '{print $3}' | sed 's/.*chr/chr/' | sed 's/.bed//' > refind.ex
#
#  version 32:
#   checkflip with fth
#
#  version 46:
#   total cleanup, --phase cleaned, 
#
#  version 47:
#   reference lign up with per chromosome (readref) for improved efficacy, some improvement in variance translation
#   refdir directly
#   meta-file for options
#
#
#
#  version 49:
#   maxpar for high IO jobs (changed blueprint as well)
#
#  version 50:
#   zipped impute2-output (-o_gz)
#   du_out at end
#
# version 51:
#   compatibility with LISA
#
# version 52:
#   comp_dir for best guess
#
# version 53:
#   replaced cobg_chunks with cobg_gw, changed pcaer as well.
#
# version 54:
#   fixed bad error checking for dosager and dabg
#   fixed lost individuals with missing phenotype
#
# version 55:
#   serial
#
# version 56
#   probability check (patch from raymond)
#   UGER at broad
#
#######################################




### working on 35, stopped at after chucker........
### get the stuff at the end of onlymeta

### look for here working (2 spots)

## 37 with checkpos3 (also based on position)
## 38 with checkpos5 (PsychChip snp names)
## 39 with cleanup (not finished yet)
## 40 with buildguessing and liftover, also testing for binaries
## 41 error message an success message, cleaning finished, 
## 42 with my.imp2.2 (including best guess haplotypes) 
##     change bcomb_3 into bcomp_3_p2
## 43 and 44: bug fixes
## 45 increase mem
## 46 cleanup, week jobs
## 47 read out reference per week.





my $version = "1.0.24";
my $progname = $0;
$progname =~ s!^.*/!!;
my $command_line = "$progname @ARGV";



my $jnum = 7; ### number of imputation job per node

my $spliha_n = 1500; ## split haplotypes with N individuals

my $best_lahunt = 5;

my $multithread1 = 4;
my $multithread2 = 8;


my $phas = -1;




my $info_txt = "";
#my $homedir = "/home/gwas";
my $rootdir = "";

my $iname = "" ;


my $suminfo = "infosum_pos";
my $suminfo_n = "$suminfo.nsnps";
my $suminfo_r = "$suminfo.reffiles";
#my $suminfo_s = "$suminfo.sorted";
my $suminfo_s = "NAN";

my $job_bn_th = 1000;

my @ref_coll = ();



#my $hapmap_ref_root = "/home/gwas/pgc-samples/hapmap_ref/";


my $fth_th = 0.15;


use Sys::Hostname;
my $host = hostname;

#my $broad = 1 if ($ENV{"DOMAINNAME"} =~ /broadinstitute/);


#############################
# read config file
#############################

my $conf_file = $ENV{HOME}."/ricopili.conf";
my %conf = ();

die $!."($conf_file)" unless open FILE, "< $conf_file";
while (my $line = <FILE>){
    my @cells = split /\s+/, $line;
    $conf{$cells[0]} = $cells[1];
}
close FILE;

sub trans {
    my ($expr)=@_;
    unless (exists $conf{$expr}) {
	die "config file without entry: $expr\n";
    }
    $conf{$expr};
}

my $ploc = &trans("p2loc");
my $shloc = &trans("shloc"); # shapeit
my $hapmap_ref_root = &trans("hmloc");
my $homedir = &trans("home");
my $qloc = &trans("queue");
my $i2loc = &trans("i2loc");
my $liloc = &trans("liloc");
my $email = &trans("email");
my $loloc = &trans("loloc");


###############################################

#if ($broad) {
#    $hapmap_ref_root = "/home/radon01/sripke/bakker_ripke/hapmap_ref/";
#    $homedir = "/home/radon01/sripke/";
#}


$ref_coll[5] = "$hapmap_ref_root"."1KG/phased/subchr" ;
$ref_coll[6] = "$hapmap_ref_root"."1KG_june10/hapmap3_r2_plus_1000g_jun2010_b36_ceu/bgl/subchr_5" ;
$ref_coll[7] = "$hapmap_ref_root"."1KG_aug10/subchr" ;
$ref_coll[8] = "$hapmap_ref_root"."1KG_aug10_nodup/subchr" ;
$ref_coll[8883] = "$hapmap_ref_root"."1KG_aug10_nodup/mhc_window" ;
$ref_coll[8882] = "$hapmap_ref_root"."mars_window/1KG" ;

$ref_coll[88] = "$hapmap_ref_root"."1KG_phas1_umich/ref_0611/subchr" ;
$ref_coll[881] = "$hapmap_ref_root"."1KG_phas1_umich/ref_0611_eur/subchr" ;
$ref_coll[882] = "$hapmap_ref_root"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr" ;
$ref_coll[8821] = "$hapmap_ref_root"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/test2" ;
$ref_coll[8822] = "$hapmap_ref_root"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/NOD2" ;
$ref_coll[88222] = "$hapmap_ref_root"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/best_basam" ;
$ref_coll[882222] = "$hapmap_ref_root"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/chr3_itih" ;
$ref_coll[882223] = "$hapmap_ref_root"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/chr19_ncan" ;

$ref_coll[9999] = "$hapmap_ref_root"."impute2_ref/1KG_Mar12/ALL_1000G_phase1integrated_feb2012_impute/test/subchr" ;
$ref_coll[9] = "$hapmap_ref_root"."impute2_ref/1KG_Mar12/ALL_1000G_phase1integrated_feb2012_impute/subchr" ;
$ref_coll[91] = "$hapmap_ref_root"."impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/subchr" ;
$ref_coll[9111] = "$hapmap_ref_root"."impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/subchr/test" ;
$ref_coll[9123] = "$hapmap_ref_root"."impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/chr23/subchr" ;
$ref_coll[91231] = "$hapmap_ref_root"."impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/chr23/subchr/test" ;


$ref_coll[923] = "$hapmap_ref_root"."impute2_ref/1KG_Mar12/ALL_1000G_phase1integrated_feb2012_impute/chr23/subchr" ;
$ref_coll[9231] = "$hapmap_ref_root"."impute2_ref/1KG_Mar12/ALL_1000G_phase1integrated_feb2012_impute/chr23_pseudo/subchr" ;


$ref_coll[555] = "$hapmap_ref_root"."1KG/phased/subchr/test";
$ref_coll[3] = "$hapmap_ref_root"."subchr";
$ref_coll[323] = "$hapmap_ref_root"."subchr/23";
$ref_coll[333] = "$hapmap_ref_root"."subchr/test" ;

$ref_coll[334] = "$hapmap_ref_root"."subchr/HLA" ;
$ref_coll[335] = "$hapmap_ref_root"."subchr/chr2_test" ;
$ref_coll[3333] = "$hapmap_ref_root"."subchr/test/local" ;
$ref_coll[3331] = "$hapmap_ref_root"."subchr/SDCCA" ;
$ref_coll[3332] = "$hapmap_ref_root"."mars_window" ;
$ref_coll[39] = "$hapmap_ref_root"."impute2_ref/HM3/hapmap3_r2_b36/subchr" ;
$ref_coll[399] = "$hapmap_ref_root"."impute2_ref/HM3/hapmap3_r2_b36/subchr.3mb" ;

$ref_coll[15] ="$hapmap_ref_root"."hla_t1d/subchr";
$ref_coll[152] ="$hapmap_ref_root"."impute2_ref/HLA_0813/orig/subchr";
$ref_coll[1521] ="$hapmap_ref_root"."impute2_ref/HLA_0813/orig/hg19";
$ref_coll[1522] ="$hapmap_ref_root"."mhc";
$ref_coll[515] = "$hapmap_ref_root"."1KG/phased/subchr/hla" ;
$ref_coll[511] = "$hapmap_ref_root"."1KG/phased/subchr/mir" ;
$ref_coll[512] = "$hapmap_ref_root"."1KG/phased/subchr/mir708" ;
$ref_coll[513] = "$hapmap_ref_root"."1KG/phased/subchr/HBII-108" ;

$ref_coll[514] = "$hapmap_ref_root"."1KG/phased/subchr/chr10_11" ;
$ref_coll[515] = "$hapmap_ref_root"."1KG/phased/subchr/chr8_9" ;
$ref_coll[516] = "$hapmap_ref_root"."1KG/phased/subchr/cacna1c" ;
$ref_coll[517] = "$hapmap_ref_root"."1KG/phased/subchr/csmd1" ;
$ref_coll[518] = "$hapmap_ref_root"."1KG/phased/subchr/tcf4" ;
$ref_coll[519] = "$hapmap_ref_root"."1KG/phased/subchr/chr2_20" ;
$ref_coll[520] = "$hapmap_ref_root"."1KG/phased/subchr/top_scz" ;
$ref_coll[521] = "$hapmap_ref_root"."1KG/phased/subchr/chr11_13" ;

$ref_coll[5555] ="$hapmap_ref_root"."impute2_ref/lboettger/chr16";

$ref_coll[4] = "$hapmap_ref_root"."CNV/subchr";
$ref_coll[444] = "$hapmap_ref_root"."CNV/subchr_test";

$ref_coll[2] = "$hapmap_ref_root"."phas2/subchr/outdir";
$ref_coll[222] = "$hapmap_ref_root"."phas2/subchr/outdir/chr12_1";
$ref_coll[311] = "$hapmap_ref_root"."hm3_ww/subchr";
$ref_coll[322] = "$hapmap_ref_root"."finref/fineur/refdan/subchr";


my $info_th = 0.1;
my $freq_th = 0.005;
my $bg_th = 0.8;

my $sjamem_incr = 0;
#exit;

my $sec_freq = .2;  ## secure freq distance around 50%






my $popname = "eur";

##### help message
my $usage = "
Usage : $progname [options] --phase PHASE --outname OUTNAME

version: $version


 --help            print this text and exits

 --phase INT       impute with HM - Phase INT as ref., no default; 
                       (mandatory if --refdir is not specified)

 --refdir STRING   full path of reference directory, overwrites --phase

 --outname STRING  identifier for imputation run (mandatory)



#### for trio datasets

 --triset STRING  for subset of trio datasets (can contain bimfiles)

 --spliha INT      split haplotypes with N individuals




##### alignment to reference:

  --popname STRING    important for freq-checks, either 
                           eur (default), 
                           asn (asian), 
                           amr (america), 
                           afr (africa), 
                           asw (african american)

  --sfh FLOAT         secure frequency around 50% (default: $sec_freq)
                                for checkflip (compare to reference),
                                only applied to AT/CG SNPs
  --fth FLOAT         frequency-diff to exclude SNPs, default $fth_th
                                for checkflip (compare to reference)



 
#### post - imputation cleaning

  --info_th FLOAT  threshold for infoscore, default = $info_th

  --freq_th FLOAT  threshold for frequence (cases and controls), default = $freq_th

  --bg_th FLOAT    threshold for frequence (cases and controls), default = $bg_th


### technical options

  --refiex            file containing refinds to exclude

  --sjamem_incr INT   increase all memore requests by INT Mb in steps of 1000 (1000 is 1Gb)

  --noclean           do not clean up intermediate files at the very end

  --force1            do not exit if same fail, but do this only once

  --sleep INT         sleep for INT seconds, try this if you think there is a race condition
                       (hints: stops a different steps, --serial works)
                       try using 5 seconds first.

  --serial            no sending jobs to queue all in one run
                          -> usually only used for testing  


### remarks 

  --phase is mandatory!! use --phase 0 for list of options

  --outname is mandatory!!




 created by Stephan Ripke 2009 at MGH, Boston, MA

";

my $phase_txt = "
 --phase options

   2 for phase 2
   3 for phase 3
   99 for AA; 
   999 for AA_CNV; 
   4 for 1KG (likelihoods)
   5 for 1KG (phased)

   10 for fin; 
   20 for uber

   15 HLA   

   -1 test HM3
   -5 test phased 1KG


   1KG:        5 -> $ref_coll[5]
   1KG-june:   6 -> $ref_coll[6]
   1KG-aug10: 7 -> $ref_coll[7]
   1KG-aug10: 8 -> $ref_coll[8]
   1KG-aug10-mars-window: 8882 -> $ref_coll[8882]
   1KG-aug10-mhc-window: 8883 -> $ref_coll[8883]


#   1KG-mar11: 88 -> $ref_coll[88]    -> so not use!
   1KG-mar11: 881 -> $ref_coll[881]

   1KG-mar11-3Mb: 882 -> $ref_coll[882]
   1KG-mar11-test2: 8821 -> $ref_coll[8821]
   1KG-mar11-3Mb-NOD2: 8822 -> $ref_coll[8822]
   1KG-mar11-3Mb-best-basam: 88222 -> $ref_coll[88222]
   1KG-mar11-3Mb-chr3-itih: 882222 -> $ref_coll[882222]
   1KG-mar11-3Mb-chr19-ncan: 882223 -> $ref_coll[882223]


   HM3_test: 333 -> $ref_coll[333]
   HM3:      334 -> $ref_coll[334]
   HM3_chr2_test: 335 -> $ref_coll[335]
   HM3_chrX: 323 -> $ref_coll[323]
   HM3_test:3333 -> $ref_coll[3333]
  HM3_SDCCA:3331 -> $ref_coll[3331]
  HM3_SDCCA_mars_window:3332 -> $ref_coll[3332]



   HLA_types: 15 -> $ref_coll[15]
   HLA_types_0813: 152 -> $ref_coll[152]
   HLA_types_0813_hg19: 1521 -> $ref_coll[1521]
   HLA_0813_HM3_hg18: 1522 -> $ref_coll[1522]
   HLA_1KG:  515 -> $ref_coll[515]



   MIR137_1KG:  511 -> $ref_coll[511]
   MIR708_1KG:  512 -> $ref_coll[512]
   HBII-108-1KG:513 -> $ref_coll[513]
   CHR10_11_1KG:514 -> $ref_coll[514]
   CHR8_9_1KG:  515 -> $ref_coll[515]
   CACNA1C_1KG: 516 -> $ref_coll[516]
   CSMD1_1KG:   517 -> $ref_coll[517]
   TCF4_1KG:    518 -> $ref_coll[518]
   CHR2_20_1KG: 519 -> $ref_coll[519]
   SCZ_TOP:     520 -> $ref_coll[520]
   SIAE:        521 -> $ref_coll[521]
   1KG_test: 555 -> $ref_coll[555]

   CNV_HM3:        4 -> $ref_coll[4]
   CNV_HM3_test: 444 -> $ref_coll[444]

   HM3:        3 -> $ref_coll[3]


   HM2:        2 -> $ref_coll[2]
   HM2:        222 -> $ref_coll[222]
   HM3_WW:    311 -> $ref_coll[311]
   HM3_FINCEUTSI: 322 -> $ref_coll[322]



   1KG-feb12-3Mb-test: 9999 -> $ref_coll[9999]

   1KG-feb12-3Mb: 9 -> $ref_coll[9]
   1KG-aug12-3Mb: 91 -> $ref_coll[91]

   1KG-aug12-3Mb_chr23: 9123 -> $ref_coll[9123]



   1KG-aug12-3Mb-1batch: 9111 -> $ref_coll[9111]
   1KG-aug12-3Mb_chr23: 91231 -> $ref_coll[91231]


   1KG-feb12-3Mb-chr23: 923 -> $ref_coll[923]
   1KG-feb12-3Mb-chr23_pseudo: 9231 -> $ref_coll[9231]



   HM3(imp2):        39 -> $ref_coll[39]
   HM3(imp2).3mb:        399 -> $ref_coll[399]




";


use Getopt::Long;
GetOptions( 


    "sjamem_incr=i"=> \$sjamem_incr,
    "info_th=f"=> \$info_th,
    "freq_th=f"=> \$freq_th,
    "bg_th=f"=> \$bg_th,
    "triset=s"=> \my $trioset_file,

    "help"=> \my $help,
    "serial"=> \my $serial,
    "sleep=i"=> \my $sleep_sw,



    "outname=s"=> \my $outname,
    "refdir=s"=> \my $refdir_str,
    "phase=i"=> \ $phas,

    "sfh=f"=> \$sec_freq,
    "fth=f"=> \$fth_th,

    "spliha_n=i"=> \$spliha_n,
    "noclean"=> \my $noclean,
    "force1"=> \my $force1,


    "popname=s"=> \$popname,
    "refiex=s"=> \my $refiex_file,

    );





if ($sleep_sw) {
  print "sleeping for $sleep_sw seconds (only use if suspect of race condition)\n";
  sleep ($sleep_sw);
}




############################################################
## testing binaries
##############################################################
my @test_scripts;


my $readref_script = "my.readref";         ### my.pipeline_tar
my $readrefsum_script = "my.readref_sum";  ### my.pipeline_tar
my $buigue_script = "buigue";              ### my.pipeline_tar
my $checkpos_script = "checkpos6";         ### my.pipeline_tar
my $checkflip_script = "checkflip4";       ### my.pipeline_tar
my $chuck_script = "my.chuck2";            ### my.pipeline_tar
my $preph_script = "my.preph";             ### my.pipeline_tar
my $imp2_script = "my.imp2.3";             ### my.pipeline_tar
my $dos_script = "haps2dos4";              ### my.pipeline_tar
my $impprob_script = "impprob_to_2dos";    ### my.pipeline_tar
my $dabg_script = "daner_bg3";             ### my.pipeline_tar
my $cobg_script = "bcomb_5_p2";            ### my.pipeline_tar
my $cobg_gw_script = "comb_bg_dir_1";      ### my.pipeline_tar
my $prune_script = "my.prune";             ### my.pipeline_tar
my $merge_script = "my.merge";             ### my.pipeline_tar
my $pseudo_script = "haps2pseudo2";        ### my.pipeline_tar
my $lift_script = "lift18219";             ### my.pipeline_tar
my $trisha_script = "trio2shape";          ### my.pipeline_tar
my $splithap_script = "splithap_1";        ### my.pipeline_tar
my $cleandir_script = "my.cleandir";       ### my.pipeline_tar
my $cleanerrandout_script = "my.cleanerrandout";  ### my.pipeline_tar
my $pdflatex_script = "pdflatex";          ### my.pipeline_tar
my $mystart_script = "my.start_job";       ### my.pipeline_tar
my $mutt_script = "mutt";                  ### my.pipeline_tar
my $du_script = "my.du";                  ### my.pipeline_tar
my $blue_script = "blueprint";         ### my.pipeline_tar



push @test_scripts, $readref_script;
push @test_scripts, $impprob_script;
push @test_scripts, $readrefsum_script ;
push @test_scripts, $buigue_script ;
push @test_scripts, $checkpos_script ;
push @test_scripts, $checkflip_script ;
push @test_scripts, $chuck_script ;
push @test_scripts, $preph_script ;
push @test_scripts, $imp2_script ;
push @test_scripts, $dos_script ;
push @test_scripts, $dabg_script ;
push @test_scripts, $cobg_script ;
push @test_scripts, $cobg_gw_script ;
push @test_scripts, $prune_script ;
push @test_scripts, $merge_script ;
push @test_scripts, $pseudo_script ;
push @test_scripts, $lift_script ;
push @test_scripts, $trisha_script ;
push @test_scripts, $pdflatex_script ;
push @test_scripts, $splithap_script ;
push @test_scripts, $cleandir_script ;
push @test_scripts, $cleanerrandout_script ;
push @test_scripts, $du_script ;
push @test_scripts,  $mystart_script;
push @test_scripts,  $blue_script;

#push @test_scripts, $mutt_script ;



print ".......testing necessary binaries....\n";
my @miss_scripts;


#my $err_scr = 0;
foreach my $scr_name (@test_scripts) {
    my $scr_path = '';
    
    for my $path ( split /:/, $ENV{PATH} ) {
	if ( -f "$path/$scr_name" && -x _ ) {
	    print "$scr_name\tfound in $path\n";
	    $scr_path = "$path/$scr_name";
	    last;
	}
    }
    if ( $scr_path eq  '') {
	push @miss_scripts, "cp /home/unix/sripke/bin/$scr_name ./\n";
	print "!!Error!! : No $scr_name command available\n" ;
    }
 
}



if (@miss_scripts > 0) {
  if (-e "get_scripts_on_broad.txt") {
    print "please remove this file and restart: get_scripts_on_broad.txt\n";
  }
  die $! unless open FILE1, "> get_scripts_on_broad.txt";
  foreach (@miss_scripts) {
    print FILE1 "$_";
  }
  close FILE1;


  print "exiting now -> have a look at get_scripts_on_broad.txt\n";
  exit;

}






print ".......testing email program....\n";

my $err_scr = 0;
{
    my $scr_path = '';
    
    for my $path ( split /:/, $ENV{PATH} ) {
	if ( -f "$path/$mutt_script" && -x _ ) {
	    print "$mutt_script\tfound in $path\n";
	    $scr_path = "$path/$mutt_script";
	    last;
	}
    }
    unless ( $scr_path ) {

	print "!!Warning!! : No $mutt_script command available, trying mail\n" ;

	$mutt_script = "mail";
	for my $path ( split /:/, $ENV{PATH} ) {
	    if ( -f "$path/$mutt_script" && -x _ ) {
		print "$mutt_script\tfound in $path\n";
		$scr_path = "$path/$mutt_script";
		last;
	    }
	}
	unless ( $scr_path ) {
	    $err_scr = 1;
	    print "!!Error!! : No $mutt_script command available\n" ;
	}
    }
 
}
die if $err_scr == 1;


print "....all necessary binaries found....\n";
print "------------------------------------\n";
#push @scripts,"id_tager_3";



#####################################
# "testing environment variable rp_perlpackages
####################################

print "testing environment variable rp_perlpackages....\n";
unless (exists $ENV{rp_perlpackages}) {
    print "Error: no environment variable for perl-packages, please re-install ricopili and make sure to follow all instructions\n";
    print "------------------------------------\n";
    exit;
}
print "....all set....\n";
print "------------------------------------\n";















my $nomega_sw = 1;



my $nomega = 0;
$nomega = 1 if ($nomega_sw);




die $usage if $help;

die $usage unless $outname;
if ($phas == -1) {
    unless ($refdir_str) {
	print "$usage\n";
	exit;
    }
}
if ($phas == 0) {
    unless ($refdir_str) {
	print "$phase_txt\n";
	exit;
    }
}



#my ($xsnp,$xchr,$xbeg,$xend);
#($xsnp,$xchr,$xbeg,$xend)= split ',', $xareastr if ($xareastr);


if ($phas == 9) {
  print "please do not use old reference any more\n";
  exit;
}

my $p2_txt = "";
if ($phas == 2 ){
    $p2_txt = "--phase2";
}


my $refdir = "";

if ($refdir_str) {
    $refdir = $refdir_str;
}
else {
    $refdir = $ref_coll[$phas];
}

unless (-d $refdir) {
    print "reference directory ($refdir) is not existing\n";
    exit;
}


my $impute_dir = "pi_sub";


#my $postimp_dir = "$impute_dir/postimp_data";







sub fisher_yates_shuffle {
    my $deck = shift;  # $deck is a reference to an array
    my $i = @$deck;
    while ($i--) {
	my $j = int rand ($i+1);
	@$deck[$i,$j] = @$deck[$j,$i];
    }
}


#####################################
# print array to file
####################################

sub a2file {
    my ($file, @lines)=@_;
    die $! unless open FILE, "> $file";
    foreach (@lines){
	print FILE $_;
    }
    close FILE;
}


###################################################
###  system call with test if successfull
###################################################
sub mysystem(){
    my ($systemstr)="@_";
    system($systemstr);
    my $status = ($? >> 8);
    die "$systemstr\n->system call failed: $status" if ($status != 0);
}


##########################################
# subroutine to split a plink-output-line
##########################################

sub split_line {
    my ($line)=@_;
    chomp($line);
    $line =~ s/^[\s]+//g;
    my @cols=  split /\s+/, $line;
}

##########################################
# subroutine to split a plink-output-line with references
##########################################

sub split_line_ref {
    my ($line)=${$_[0]};
    chomp($line);
    $line =~ s/^[\s]+//g;
    my @cols=  split /\s+/, $line;
    \@cols;
}





#####################################
# print array to file with newline
####################################

sub a2filenew {
    my ($file, @lines)=@_;
    die $! unless open FILE, "> $file";
    foreach (@lines){
	print FILE "$_\n";
    }
    close FILE;
}


#####################################
# append array to file with newline
####################################

sub a2filenew_app {
    my ($file, @lines)=@_;
    die "$!: $file" unless open FILE, ">> $file";
    foreach (@lines){
	print FILE "$_\n";
    }
    close FILE;
}

#####################################
# subroutine to count lines of a file
#####################################

sub count_lines {
    my ($file)=@_;
    my $lc=0;
    die "$file: ".$! unless open FILE, "< $file";
    while (<FILE>){
	$lc++;
    }
    close FILE;
    $lc;
}



#####################################
# subroutine to re-invoke this script
#####################################

sub reinvo_b {
    my ($message, $wt_file)=@_;
    my $now = localtime time;
    my $old_cmd = `tail -3 $loloc/impute_dir_info | head -1`;

    my $message_part = $info_txt."\t$message";
    $message = $info_txt."\t$message\t$now";

    &a2filenew_app("$loloc/impute_dir_info",$message);
    die "2 times already" if ($old_cmd =~ /$message_part/);
    chdir "$rootdir" or die "something strange";
    if ($qloc eq "bsub") {
	$wt_file =~ s/.*blueprint_joblist_file-//;
    }

    my $sys_re = "$blue_script --njob $job_bn_th -b \"$command_line\" --wa 4 --di -j --fwt $wt_file --na _if_$outname";
#    print "$sys_re\n";
    &mysystem ($sys_re);
    exit;

}


#####################################
# send jobs to cluster and also send navi again
#####################################

my $sjadir = "";
my $sjaweek = 0;
my $sjaname = "";
my $sjarow = "";
my @sjaarray;
my $sjamem = 0;
my $sjamaxpar = 0;

my $sjatime = -1;
my $sjamaxjobs = 30000;
my $sjainfofile = "$loloc/impute_dir_info";
unless (-e $sjainfofile) {
    print "log-file ($sjainfofile) is not existing\n";
    print "please check loloc in ~/ricopili.conf\n";
    exit;
}
#my $sjainfofile = "$homedir/impute_dir_info_35_test";
my $sjainfotxt = "";
my $sjamulti = 0;


sub send_jobarray {

    die "send_jobarray with undefined variables, dir" if ($sjadir eq "");
    die "send_jobarray with undefined variables, name" if ($sjaname eq "");
    die "send_jobarray with undefined variables, array" if (@sjaarray == 0);
    die "send_jobarray with undefined variables, mem" if ($sjamem == 0);
    die "send_jobarray with undefined variables, time" if ($sjatime < 0);
    die "send_jobarray with undefined variables, info" if ($sjainfotxt eq "");


    my $now = localtime time;
    $now =~ s/ /_/g;


    if ($sjaname eq "finished") {

	my $fini_message ;
	$fini_message .= "\n\n##################################################################\n";
	$fini_message .= "##### CONGRATULATIONS: \n";
	$fini_message .= "##### rp_pipeline finished successfully:\n";
	$fini_message .= "##### $sjainfotxt\n";
	$fini_message .= "##### now start with PCA (see README in subdir pcaer_sub/)\n";
	$fini_message .= "##### or directly with postimputation analysis\n";
	$fini_message .= "##### have a look at the wiki page\n"; 
	$fini_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n";
	$fini_message .= "##################################################################\n";
	print "$fini_message\n";

	
	die $! unless open SUC, "> success_file";
	print SUC $fini_message."\n";
	close SUC;

	&mysystem ('cat success_file | '.$mutt_script.' -s RP_pipeline_finished '.$email) ;

	my $sjarow      = $sjainfotxt."\t$sjaname\t$now";
	&a2filenew_app("$sjainfofile",$sjarow);


	exit;

    }


    chdir ($sjadir);
    my $jobfile = "$sjaname.job_list";
    while (-e $jobfile) {
	$jobfile .= ".s";
	if (@sjaarray < 200) {
	    $sjatime = 4;
	}
    }


    
    &a2filenew ($jobfile, @sjaarray);

#    print "$jobfile\n";
#    exit;

    my $nsja = @sjaarray;

    my $nsja_loc = $nsja;
    if ($nsja_loc > 30000) {
	$nsja_loc = 30000;
    }

    my $multi_txt = "";
    if ($sjamulti > 0) {
	$multi_txt = "--multi $nsja_loc,$sjamulti";
    }

    ### with array
    $sjamem = $sjamem + $sjamem_incr;



    my $sja_week_str = "";
    if ($sjaweek > 0) {
	$sja_week_str = "--week 1";
    }



    
    if ($serial) {
	print "starting step $sjaname with ".@sjaarray." jobs\n";
	print "please be patient.\n";
	my $jc = 1;
	foreach (@sjaarray) {
	    print "running job $jc...\n";
	    &mysystem($_);
	    $jc++;
	    
	}
    }
    else { 
	my $sys_loc = "$blue_script $sja_week_str --maxpar $sjamaxpar --noerr --njob $nsja_loc --array $jobfile --wa $sjatime --mem $sjamem --j --na $sjaname.$outname $multi_txt";
#    print "$sys_loc\n";
#    exit;

	
	&mysystem ($sys_loc);
    }
#    exit;


    my $old_cmd = `tail -1 $sjainfofile | head -1`;

    my $nsja_txt = sprintf "%06d",$nsja;

    my $sjacontent = "$sjaname.".$nsja_txt;

    my $sjarow_part = $sjainfotxt."\t$sjacontent";
    my $sjarow      = $sjainfotxt."\t$sjacontent\t$now";
#    $message = $info_txt."\t$message\t$now";

    &a2filenew_app("$sjainfofile",$sjarow);

    if ($old_cmd =~ /$sjarow_part/){
	unless ($force1 ){
	    my $err_message ;
	    $err_message .= "##################################################################\n";
	    $err_message .= "##### Error: \n";
	    $err_message .= "##### step $sjaname has been done repeatedly without any progress\n";
	    $err_message .= "##### imputation pipeline stopped: $command_line\n";
	    $err_message .= "##### $sjainfotxt\n";
	    $err_message .= "##### if reason does not appear obvious\n";
	    $err_message .= "##### have a look at the wiki page\n"; 
	    $err_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n";
	    $err_message .= "##### or contact the developers\n";
	    $err_message .= "##################################################################\n";
	    print "$err_message\n";

	    die $! unless open ERR, "> error_file";
	    print ERR $err_message."\n";
	    close ERR;


	    &mysystem ('cat error_file | '.$mutt_script.' -s RP_pipeline_error '.$email) ;

	    unless ($serial) {
		exit;
	    }

	}

    }


    $command_line =~ s/--force1//;


    my $wt_file = "$sjadir/blueprint_joblist_file-$sjaname.$outname";
    chdir "$rootdir" or die "something strange";
    if ($qloc eq "bsub") {
	$wt_file =~ s/.*blueprint_joblist_file-//;
    }

    if ($qloc eq "slurm") {
	$wt_file = "$sjadir/$jobfile.script.id";
    }

    if ($qloc eq "qsub") {
	$wt_file = "$sjadir/j.$sjaname.$outname.id";
    }
    if ($qloc eq "qsub_c") {
	$wt_file = "$sjadir/j.$sjaname.$outname.id";
    }
    if ($qloc eq "qsub_b") {
	$wt_file = "$sjadir/j.$sjaname.$outname.id";
    }
    


    if ($serial) {
	my $sys_re = "$command_line";
	&mysystem ($sys_re);
	exit;
    }
    else {
	my $sys_re = "$blue_script --njob $job_bn_th -b \"$command_line\" --wa 2 --di -j --fwt $wt_file --na _if_$outname";
	&mysystem ($sys_re);
    }



    print "------------------------------------------------------------\n";
    print "$nsja jobs successfully submitted\n";
    print "please see tail of $sjainfofile for regular updates\n";
    print "also check bjobs -w for running jobs\n";
    print "possibly differnt command on different computer cluster: e.g. qstat -u USER\n";
    print "you will be informed via email if errors or successes occur\n";
    print "------------------------------------------------------------\n";

    exit;


}




#####################################
# subroutine to re-invoke this script
#####################################

sub reinvo_b_week {
    my ($message, $wt_file)=@_;
    my $now = localtime time;
    my $old_cmd = `tail -3 $loloc/impute_dir_info | head -1`;

    my $message_part = $info_txt."\t$message";
    $message = $info_txt."\t$message\t$now";

    &a2filenew_app("$loloc/impute_dir_info",$message);
    die "2 times already" if ($old_cmd =~ /$message_part/);
    chdir "$rootdir" or die "something strange";
    if ($qloc eq "bsub") {
	$wt_file =~ s/.*blueprint_joblist_file-//;
    }

    &mysystem ("$blue_script --week 1 --njob $job_bn_th -b \"$command_line\" --wa 10 --di -j --fwt $wt_file --na _if_$outname");
    exit;

}



##############################################
##############################################
#############  BEGIN
##############################################
##############################################


use Cwd;
use File::Path;
$rootdir = &Cwd::cwd();
$sjainfotxt = "$rootdir\t$command_line";






my $pcaer_dir = "$rootdir/pcaer_sub";
my $archive_dir = "/archive/gwas/scz/archive/$outname";


#print "to archive it back:\n";
#print "rsync -ave ssh gwas\@lisa.sara.nl:$archive_dir/pi_*.tar.gz $impute_dir/ \n";

#exit;

unless (-e $impute_dir){
    print "impute_dir is not existing, create one for you\n";
    my @created = mkpath(   ## $created ?
			    $impute_dir,
			    {verbose => 0, mode => 0750},
	);
}




unless (-e $pcaer_dir){
    print "pcaer_dir is not existing, create one for you\n";
    my @created = mkpath(   ## $created ?
			    $pcaer_dir,
			    {verbose => 0, mode => 0750},
	);
}



#unless (-e $archive_dir){
#    print "$archive_dir\n";
#    exit;
#    print "archive_dir is not existing, create one for you\n";
#    my @created = mkpath(   ## $created ?
##			    $archive_dir,
#			    {verbose => 0, mode => 0750},
#	);
#}

#exit;

#####################################
# create suminfo if not existing
#####################################

#unless (-e "$refdir/$suminfo_s"){
#    print "WARING: $refdir/$suminfo_s not existing\n";
#    chdir ($refdir);
#    &mysystem ("cat *.info_pos  | grep -v SNP > $suminfo");
#    &mysystem ("sort -k1,1 -u $suminfo > $suminfo_s");
#    chdir ($rootdir);
#}

#####################################
## if new frequency file is existing
###################################
if (-e "$refdir/sumfrq.$popname") {
    $suminfo_s = "sumfrq.$popname";
}
else {

    my $popname_uc = uc($popname);
    if (-e "$refdir/sumfrq.$popname_uc") {
	$suminfo_s = "sumfrq.$popname_uc";
    }
    else {
	print "$refdir/sumfrq.$popname_uc in refdir is not existing!!!\n";
	die;
#	sleep(10);
    }
}

#print $refdir."\n";
#print $suminfo_s."\n";
#exit;


unless (-e "$refdir/$suminfo_n"){
    print "ERROR: $refdir/$suminfo_n not existing\n";

#    chdir ($refdir);
#    &mysystem ("wc -l *.info_pos > $suminfo_n");
#    chdir ($rootdir);
}


unless (-e "$refdir/$suminfo_r"){
    print "ERROR: $refdir/$suminfo_r not existing\n";
    die;
#    chdir ($refdir);
#    &mysystem ("ls sc_*.bgl > $suminfo_r");
#    chdir ($rootdir);
}

#my @refallfiles;

#    opendir(DIR, "$refdir") || die "can't opendir .: $!";
#    @refallfiles = readdir(DIR);
#    closedir DIR;
#}
my $cc=0;



my %refiex;
if ($refiex_file) {
    print "read $refiex_file\n";
    die $!." <$refiex_file>" unless open IN, "< $refiex_file";
    while (my $line = <IN>){
	chomp($line);
	$refiex{$line} = 1;
	print "$line\n";
    }
    close IN;

}







my @reffiles;
print "read $refdir/$suminfo_n....\n";
die $!." <$refdir/$suminfo_n>" unless open IN, "< $refdir/$suminfo_n";
while (my $line = <IN>){
    my @cells = &split_line($line);
    die "problem with $refdir/$suminfo_n" if (@cells < 2);
    my $bgl_file = $cells[1];
    $bgl_file =~ s/.info_pos$//;

    if ($refiex_file) {

	my $refind = $bgl_file;
	if ($refind =~ /chr[0-9]*_[0-9]*_[0-9]*/){
	    $refind =~ s/.*(chr[0-9]*_[0-9]*_[0-9]*).*/\1/;
	}
	else {
	    $refind =~ s/.*(chr[0-9]*_[0-9]*).*/\1/;
	}

	if (exists $refiex{$refind}){
	    print "exclude: $bgl_file\n" ;
	    next;
	}

    }

    next if ($bgl_file eq "total");

#    print "$bgl_file\n";
    push @reffiles, $bgl_file;
    $cc++;
}
close IN;
#print "finished reading $refdir/$suminfo_n\n";
die "reference directory <$refdir> empty (no sc.*bgl)" if (@reffiles == 0);



#exit;
#print "sleep\n";
#sleep(5);


my @files = ();
opendir(DIR, ".") || die "can't opendir .: $!";
@files = readdir(DIR);
closedir DIR;

my @pi_files = ();

unless (-e "$rootdir/puting_done") {
    opendir(DIR, "$impute_dir") || die "can't opendir .: $!";
    @pi_files = readdir(DIR);
    closedir DIR;
}


### read bim-files
my @bim_files = grep {/bim$/} @files;
#print "@bim_files\n";

foreach (@bim_files) {
    if ($_ =~ /.hg19.ch.fl.bim$/){
	print "wrong filename, will rename:\n";
	my $obfile = $_;
	$obfile =~ s/.bim$//;
	my $nbfile = $obfile;
	$nbfile =~ s/.hg19.ch.fl/.bf/;
	print "mv $obfile.bed/bim/fam $nbfile.bed/bim/fam\n";
	&mysystem ("mv $obfile.fam $nbfile.fam");
	&mysystem ("mv $obfile.bed $nbfile.bed");
	&mysystem ("mv $obfile.bim $nbfile.bim");
	print "to redo\n";
	print "mv $nbfile.bim $obfile.bim\n";
	print "mv $nbfile.fam $obfile.fam\n";
	print "mv $nbfile.bed $obfile.bed\n";

	exit;
    }
}

#print "sleep\n";
#sleep(10);

my @bimfli_files = grep {/.ch.fl.bim$/} @pi_files;
my @bimpos_files = grep {/.ch.bim$/} @pi_files;
my @bimref_files = grep {/.bim.ref/} @pi_files;
my @bimhg19_files = grep {/.hg19.bim$/} @pi_files;
if (-e "$rootdir/puting_done") {
#if (@bimfli_files == 0) {
    foreach (@bim_files) {
	my $bitemp = $_;
	$bitemp =~ s/.bim$//;
	$bitemp .= ".hg19.ch.fl.bim";
	push @bimfli_files,$bitemp;
    }
}

#print @bimfli_files."\n";
print "@bimhg19_files\n";
#print "debug\n";
#sleep(10);
#exit;


### read flipped bim-files
my %bimfli_array = ();
foreach (@bimfli_files) {
    $bimfli_array{$_} = 1;
}

### read flipped bim-files
my %bimpos_array = ();
foreach (@bimpos_files) {
    $bimpos_array{$_} = 1;
}

### read flipped bimref-files
my %bimref_array = ();
foreach (@bimref_files) {
    $bimref_array{$_} = 1;
}

### read flipped bim-files
my %bimhg19_array = ();
foreach (@bimhg19_files) {
    $bimhg19_array{$_} = 1;
}



## name for log-files
$iname = $bimfli_files[0];
$iname = $bim_files[0] if ($iname eq "");
$iname =~ s/.bim$//;
$iname =~ s/qc2report_//;

#####################################
# prepare pi_subdir
#####################################

chdir ($impute_dir);

unless (-e "$rootdir/puting_done") {
    foreach (@bim_files) {
	my $bfile = $_;
	$bfile =~ s/.bim$//;
	&mysystem("ln -s $rootdir/$bfile.bim .") unless (-e "$bfile.bim");
	&mysystem("ln -s $rootdir/$bfile.bed .") unless (-e "$bfile.bed");
	&mysystem("ln -s $rootdir/$bfile.fam .") unless (-e "$bfile.fam");
    }
}


##########################################
## prepare trioset file
############################################

my %trioset;
my %trioset_bimfli;
if ($trioset_file){
    die $!." <$rootdir/$trioset_file>" unless open IN, "< $rootdir/$trioset_file";
    while (my $line = <IN>){
	my @cells = &split_line($line);
	$cells[0] =~ s/.bim$//;
	$cells[0] =~ s/.fam$//;
	$cells[0] .= ".hg19.ch.fl";
	$trioset{$cells[0]} = 1;
	print "$cells[0] is triodata\n";
    }
    close IN;
}
#exit;

foreach my $bifi (@bimfli_files) {
    $bifi =~ s/.bim$//;
    if (exists $trioset{$bifi}){
	$trioset_bimfli{$bifi} = 1;
    }
}


#######################################################
## set a single chromosome
######################################################

#my $chr_start=1;	
#my $chr_end=22;	
#if ($chr !=0 ){
#    $chr_start = $chr;
#    $chr_end = $chr;
#}




################################################
### set info text
####################################################


$info_txt = "command:\t\"$command_line\"\tdir:\t$rootdir";



#####################################################
## check readref
########################################################


my $readref_sw = 1;

foreach my $chrloc(1..22) {
    my $reffi ="$refdir/$suminfo_s.$chrloc.gz";
    unless (-e $reffi) {
	print "Warning: $reffi is not existing, it's ok if using older reference\n";
	$readref_sw = 0;
	last;
    }
}

if ($readref_sw == 1) {
    print "efficient reference alignment switched on\n";
}
else {
    print "efficient reference alignment switched off, please check refdir, will continue in 3 sec...\n";
    sleep(3);
}
#print "exit;\n";
#exit;



###################################
### GUESS BUILD
###################################

my @buigue_arr = ();
my $buigue_fini = 0;

    unless (-e "$rootdir/buigue_done") {
    unless (-e "$rootdir/posing_done") {
	foreach (@bim_files) {
	    my $bfile = $_;
	    $bfile =~ s/.bim$//;
	    my $accfli ="$bfile".".hg19.bim";
#	    print "he: $accfli\n";
#	    exit;
	    unless (exists $bimhg19_array{$accfli}) {
		push @buigue_arr, "$buigue_script --lift19 $bfile.bim" ;#
	    }
	    else {
		$buigue_fini++;
	    }
	}

	if (@buigue_arr > 0) {
	    
	    $sjadir = $impute_dir;
	    $sjaname = "buigue";
	    
	    $sjatime = 2;
#	    $sjatime = 4 if ($buigue_fini > 0);
	    
	    $sjamem = 3000;
	    @sjaarray = @buigue_arr;
	    
	    &send_jobarray;
	}
	else {
	    &mysystem ("touch $rootdir/buigue_done");
	    print "build_guess done\n";
	}
    }
    }


###################################
### READREF
###################################



my @readref_arr = ();
my $readref_fini = 0;

if ($readref_sw == 1) {
    unless (-e "$rootdir/readref_done") {
	foreach (@bim_files) {
	    my $bimfile = $_;
	    my $bfile = $bimfile;
	    $bfile =~ s/.bim$//;
	    my $accfli ="$bfile".".hg19.bim";

	    
	    foreach my $chrloc(1..22) {
		my $bimref ="$accfli".".ref.chr$chrloc";
		my $reffi ="$refdir/$suminfo_s.$chrloc.gz";
		unless (exists $bimref_array{$bimref}) {
		    push @readref_arr, "$readref_script --chr $chrloc --ref $reffi $accfli" ;#
#		print "$readref_script --chr $chrloc --ref $reffi $bimfile\n" ;#
		}
		else {
		    $readref_fini;
		}
	    }
	}
#    exit;
	if (@readref_arr > 0) {
	    
	    $sjadir = $impute_dir;
	    $sjaname = "readref";
	    $sjatime = 2;
#	    $sjatime = 4 if ($readref_fini > 0);
	    
	    $sjamem = 3000;
	    $sjamaxpar = 100;
	    @sjaarray = @readref_arr;
	    


	    &send_jobarray;
	}
	else {
	    &mysystem ("touch $rootdir/readref_done");
	    print "readref done\n";
	}
    }


###################################
### sum readref
###################################

    unless (-e "$rootdir/readrefsum_done") {

	my @readrefsum_arr = ();
	my $readrefsum_fini = 0;
	
	unless (-e "$rootdir/readrefsum_done") {
	    foreach (@bim_files) {
		my $bimfile = $_;
		my $bfile = $bimfile;
		$bfile =~ s/.bim$//;
		my $accfli ="$bfile".".hg19.bim";
		my $bimref_done ="$accfli".".ref.sum.done";
#		print "looking for $bimref_done\n";
		unless (exists $bimref_array{$bimref_done}) {
		    push @readrefsum_arr, "$readrefsum_script $accfli" ;#
		}
		else {
		    $readrefsum_fini++;
		}
	    }
	    
	    if (@readrefsum_arr > 0) {

#		print "stragne\n";
#		exit;
		
		$sjadir = $impute_dir;
		$sjaname = "reresum";
		$sjatime = 2;
#		$sjatime = 4 if ($readrefsum_fini > 0);

		$sjamem = 1000;
		@sjaarray = @readrefsum_arr;
		
		&send_jobarray;
	    }
	    else {
		&mysystem ("touch $rootdir/readrefsum_done");
		print "readrefsum done\n";
	    }
	}
    }
}


#exit;



###################################
### CHECKPOS
##################################################
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
### checkpos6 needs the var_chr_renaming (see my.readref) -> done
###################################

my @chepos_arr = ();
my $chepos_fini = 0;

#print "???\n";
#exit;

unless (-e "$rootdir/posing_done") {
    foreach (@bim_files) {
	my $bfile = $_;
	$bfile =~ s/.bim$//;
	my $locref = $bfile.".hg19.bim.ref.sum";
	my $accfli ="$bfile".".hg19.ch.bim";

	if (-e $locref) {
	    print "locref $locref is existing! safes some time\n";
	    unless (exists $bimpos_array{$accfli}) {
		push @chepos_arr, "$checkpos_script --dbcol 1,2,3 --dbsnp $rootdir/$impute_dir/$locref $bfile.hg19.bim" ;#
#		print "$checkpos_script --dbcol 1,2,3 --dbsnp $rootdir/$impute_dir/$locref $bfile.hg19.bim\n" ;#

	    }
	    else {
		$chepos_fini++;
	    }
	}
	else {
	    print "locref $locref is not existing! would be better if it did\n";
	    unless (exists $bimpos_array{$accfli}) {
		push @chepos_arr, "$checkpos_script --dbcol 1,8,9 --dbsnp $refdir/$suminfo_s $bfile.hg19.bim" ;#
	    }
	    else {
		$chepos_fini++;
	    }
	}

    }
#    exit;

   
    if (@chepos_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "chepos";
	$sjatime = 2;
#	$sjatime = 4 if ($chepos_fini > 0);
	$sjamem = 3000;
	@sjaarray = @chepos_arr;
	
	&send_jobarray;
    }
    else {
	&mysystem ("touch $rootdir/posing_done");
	print "checkpos done\n";
    }
}

#exit;

###################################
### CHECKFLIP
###################################

#print "checkflip3?\n";
#exit;

my @chefli_arr = ();
my $chefli_fini = 0;

    unless (-e "$rootdir/flipping_done") {
	foreach (@bim_files) {
	    my $bfile = $_;
	    $bfile =~ s/.bim$//;
	    my $accfli ="$bfile".".hg19.ch.fl.bim";
	    my $locref = $bfile.".bim.ref.sum";

	    if (-e $locref) {
		print "locref $locref is existing! safes some time\n";
		unless (exists $bimfli_array{$accfli}) {
		    my $systmp = "$checkflip_script --dbcol 0,3,4,5 --fth $fth_th --sfh $sec_freq --info $rootdir/$impute_dir/$locref $bfile.hg19.ch.bim" ;
		    push @chefli_arr, $systmp ;
#		    print "$systmp\n";
#		    exit;
#		    push @chepos_arr, "$checkpos_script --dbcol 1,2,3 --dbsnp $rootdir/$impute_dir/$locref $bfile.hg19.bim" ;#
		    
		}
		else {
		    $chefli_fini++;
		}
	    }
	    else {
		unless (exists $bimfli_array{$accfli}) {
		    push @chefli_arr, "$checkflip_script --fth $fth_th --sfh $sec_freq --info $refdir/$suminfo_s $bfile.hg19.ch.bim" ;
		}
		else {
		    $chefli_fini++;
		}
	    }


	}

#	exit;

	if (@chefli_arr > 0) {
	    
	    $sjadir = $impute_dir;
	    $sjaname = "chefli";
	    $sjatime = 2;
#	    $sjatime = 4 if ($chefli_fini > 0);
	    $sjamem = 3000;
	    @sjaarray = @chefli_arr;
	    
	    &send_jobarray;
	    
	}
	else {
	    &mysystem ("touch $rootdir/flipping_done");
	    print "checkflip done\n";
	}
    }



#exit;



###############################################################################
#### read reference info
###########################################################################
my %info_n;
unless (-e "$rootdir/puting_done"){

    print "read reference_info into memory\n";
    die $!." <$refdir/$suminfo_n>" unless open IN, "< $refdir/$suminfo_n";
    
    while (my $line = <IN>){
	my @cells = &split_line($line);
	$info_n{$cells[1]} = $cells[0];
    }
    
    close IN;
}





###########################
#### here preparation of famfiles for shapeit
############################


print "prepare famfiles for shapeit\n";

unless (-e "$rootdir/puting_done") {
    foreach (@bim_files) {

	my $bprefix = $_;
	$bprefix =~ s/.bim$//;
	my %sex_hash = ();

	### include sex check for chrX
	if ($phas == 923 || $phas == 9123 || $phas == 91231) {
	    if (-e "$bprefix.hg19.ch.fl.fam") {
		unless (-e "$bprefix.hg19.ch.fl.sexcheck") {
		    my $sx = "$ploc/plink --memory 2000  --bfile $bprefix.hg19.ch.fl --check-sex --out $bprefix.hg19.ch.fl";
		    &mysystem ($sx);
		}
		
		
		die $! unless open SI, "< $bprefix.hg19.ch.fl.sexcheck";
		while (my $line = <SI>){
		    my @cells = &split_line($line);
		    if ($cells[5] < .5) {
			$sex_hash{"$cells[0]\t$cells[1]"} = 2;
		    }
		    else {
			$sex_hash{"$cells[0]\t$cells[1]"} = 1;
		    }
		}
		close SI;
	    }
	}
	
	if (-e "$bprefix.hg19.ch.fl.fam"){ 
	    unless (-e "$bprefix.hg19.ch.fl.fam.idnum") {
		die $! unless open FI, "< $bprefix.fam";
		die $! unless open FO, "> $bprefix.hg19.ch.fl.fam.idnum.tmp";
		die $! unless open FT, "> $bprefix.hg19.ch.fl.fam.transl";
		my $cc = 1;
		while (my $line = <FI>){
		    my @cells = &split_line($line);
		    print FO "$cc $cc"; 

		    print FO " 0"; 
		    print FO " 0"; 

#		    print FO " $cells[2]"; 
#		    print FO " $cells[3]"; 
		    if (exists $sex_hash{"$cells[0]\t$cells[1]"}){
			print FO " ".$sex_hash{"$cells[0]\t$cells[1]"}; 
		    }
		    else {
			print FO " $cells[4]"; 
			if ($phas == 923 || $phas == 9123 || $phas == 91231) {
			    print "Error: no sex-check on X-chr?\n";
			    die;
			}
		    }
		    
		    print FO " $cells[5]\n";
		    print FT "$cc"; 
		    print FT " $cells[0]";
		    print FT " $cells[1]\n";
		    $cc++;
		}
		close FI;
		close FO;
		close FT;
		my $nloc = $cc -1;
		die $! unless open FN, "> $bprefix.hg19.ch.fl.fam.n";
		print FN $nloc."\n";
		close FN;
		&mysystem ("mv $bprefix.hg19.ch.fl.fam.idnum.tmp $bprefix.hg19.ch.fl.fam.idnum");
	    }
	}
    }
}

#exit;

############################################################
### refind array
#############################################################

my @refind_arr;
my @refstart_arr;
my @refend_arr;

my @refchr_arr; ## safes the chromosomes in same order
my @gema_arr; ## safes the gema_files in same order
my %gema_hash; ## contains the gema-files for each chromosome

foreach my $rf (@reffiles) {
    
    my $refind = $rf;
    if ($refind =~ /chr[0-9]*_[0-9]*_[0-9]*/){
	$refind =~ s/.*(chr[0-9]*_[0-9]*_[0-9]*).*/\1/;
    }
    else {
	$refind =~ s/.*(chr[0-9]*_[0-9]*).*/\1/;
    }

    push @refind_arr, $refind;
#    print "$refind\n";


#    my $chrind = $rf;
#    $chrind =~ s/.*(chr[0-9]*).*/\1/;
#    $chrind =~ s/chr//;

    my @tcells = split /_/, $refind;
    my $mega_start = $tcells[1];
    my $mega_end = $tcells[2];

    my $chrind = $tcells[0];
    $chrind =~ s/chr//;

    push @refchr_arr, $chrind;
    push @refstart_arr, $mega_start;
    push @refend_arr, $mega_end;

    my $gema_file = "$refdir/genetic_map_chr$chrind"."_combined_b37.txt";
    push @gema_arr, $gema_file;

    ### test existence once per chromosome
    unless (exists $gema_hash{$chrind}) {
	$gema_hash{$chrind} = 1;
	die "$gema_file not existent" unless (-e $gema_file);
    }
    
}


#############################################
### write out files for postimp
#############################################

unless (-e "$rootdir/reference_info") {
    die $! unless open REF, "> reference_info.tmp";
    print REF "$refdir\n";
    foreach (@refind_arr) {
	print REF "$_\n";
    }
    close REF;
    &mysystem ("mv reference_info.tmp $rootdir/reference_info");
}



#    unless (-e "$rootdir/datasets_info") {
#	die $! unless open BF, "> datasets_info.tmp";
#	foreach (@bimfli_files) {
#	    print BF "$_\n";
#	}
#	close BF;
#	&mysystem ("mv datasets_info.tmp $rootdir/datasets_info");
 #   }




############################################################
### chucking
#############################################################


my $chucking_done = 0;
if (-e "$rootdir/chucking_done") {
    $chucking_done = 1;
}


my @chuck_arr = ();
my $chuck_fini = 0;
if ($chucking_done == 0) {
    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $subdir = "subbfile_$bprefix";
	unless (-e $subdir) {
	    &mysystem ("mkdir $subdir");
	}

	my $subdir_empty = "empty_$bprefix";
	unless (-e $subdir_empty) {
	    &mysystem ("mkdir $subdir_empty");
	}

	my $trio_sw = 0;
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;
	}

	my @finifiles = ();
	my %finifiles_hash = ();
	opendir(DIR, $subdir) || die "can't opendir .: $!";
	@finifiles = grep {/.fini$/} readdir(DIR);
	foreach (@finifiles) {
	    $finifiles_hash{$_} = 1;
	}
	closedir DIR;

	
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {

	    my $refind = $refind_arr[$ccc];
	    my $rf = $reffiles[$ccc];

	    my $plink_ch_out = "$subdir/plink.$bprefix.$refind";
	    my $plink_ch_out_fini = "plink.$bprefix.$refind.fini";
	    my $empty_ch_out = "$subdir_empty/$bprefix.$refind";
	    my $snps_extract = "$refdir/$rf.info_pos";

	    my $mendel_sw = "";
	    if ($trio_sw ==1) {
		$mendel_sw = "--mendel";
	    }
	    unless (exists $finifiles_hash{$plink_ch_out_fini} ) {
		push @chuck_arr, "$chuck_script --out $plink_ch_out --in $bprefix --sfile $snps_extract $mendel_sw --empty $empty_ch_out";
	    }
	    else {
		$chuck_fini++;
	    }
	    
	}
    }

    


###################################
### send chuck jobs
###################################

    if (@chuck_arr > 0) {

	$sjadir = $impute_dir;
	$sjaname = "chuck";
	$sjatime = 0;
#	$sjatime = 1 if ($chuck_fini > 0);
	$sjamem = 4000;
	@sjaarray = @chuck_arr;

	&send_jobarray;

    }
    else {
	&mysystem ("touch $rootdir/chucking_done");
	print "chucking done\n";
    }
}
# die "debug_chuck";



############################################################
### prephase
#############################################################


my $prephase_done = 0;
if (-e "$rootdir/prephase_done") {
    $prephase_done = 1;
}

my $spliha_n_2= $spliha_n * 2;


my @preph_arr = ();
my $preph_fini = 0;
my @preph_arr_mu1 = ();
my @preph_arr_mu2 = ();

if ($prephase_done == 0) {
    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $subdir = "haps_$bprefix";
	my $subdir_in = "subbfile_$bprefix";
	my $subdir_empty = "empty_$bprefix";
	unless (-e $subdir) {
	    &mysystem ("mkdir $subdir");
	}

	my $trio_sw = 0;
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;
	}

	print "read emptydir....\n";
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;
	my %indirempty_hash = ();
	foreach (@indirempty) {
	    $indirempty_hash{$_} = 1;
	}


	my @dirfiles = ();
	my @finifiles = ();
#	my @errorfiles = ();
	my @mufiles = ();
	my %finifiles_hash = ();
#	my %errorfiles_hash = ();

	print "read outdir....\n";
	opendir(DIR, $subdir) || die "can't opendir .: $!";
	@dirfiles = readdir(DIR);
	@finifiles = grep {/\.fini$/} @dirfiles;
#	@errorfiles = grep {/\.error$/} @dirfiles;
	@mufiles = grep {/\.multi\..$/} @dirfiles;
	closedir DIR;

	foreach (@finifiles) {
	    $finifiles_hash{$_} = 1;
	}
#	foreach (@errorfiles) {
#	    $errorfiles_hash{$_} = 1;
#	}

	foreach (@mufiles) {
	    $finifiles_hash{$_} = 1;
	}

	if ($trio_sw == 1) {
	    unless (-e "$bprefix.fam.shape"){
		print "create shapefile for trios\n";
		my $sys = "$trisha_script $bprefix.fam";
		&mysystem ($sys);
	    }
	}
#	else {
#	    print "$bprefix is not a trio\n";
#	    print "sleep\n";#
#	    sleep(10);
#
#	}

	print "start refdirloop....\n";
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {

	    my $refind = $refind_arr[$ccc];
	    my $rf = $reffiles[$ccc];
	    my $gema_file = $gema_arr[$ccc];

	    my $plink_ch_out = "$subdir_in/plink.$bprefix.$refind";
	    my $empty_ch_out = "$bprefix.$refind.empty";
	    my $haps_ch_out = "$subdir/plink.$bprefix.$refind";
	    my $haps_ch_out_fini = "plink.$bprefix.$refind.fini";
	    my $haps_ch_out_mu0 = "plink.$bprefix.$refind.multi.0";
	    my $haps_ch_out_mu1 = "plink.$bprefix.$refind.multi.$multithread1";

	    my $famfile = "$bprefix.fam.idnum";
	    if ($trio_sw ==1) {
		$famfile = "$bprefix.fam.shape";
	    }
#	    print "famfile: $famfile\n";
	    my $xtxt = "";
	    if ($phas == 923 || $phas == 9123 || $phas == 91231) {
		$xtxt = "--chrX";
	    }
	    my $multi_txt = "";
	    my $multi_sw = 0;
	    if (exists $finifiles_hash{$haps_ch_out_mu0} ) {
		$multi_txt = "--multi $multithread1";
		$multi_sw = 1;
	    }
	    if (exists $finifiles_hash{$haps_ch_out_mu1} ) {
		$multi_txt = "--multi $multithread2";
		$multi_sw = 2;
	    }


	    if (exists $indirempty_hash{$empty_ch_out} ) {
#		print "$bprefix.$refind.empty\n";
	    }
	    else {
		unless (exists $finifiles_hash{$haps_ch_out_fini} ) {

#		if (exists $finifiles_hash{$haps_ch_out_fini} ) {
#
#		}
		################################## work on this here!!!!!
		    my $backbone = "$preph_script $xtxt --spliha $spliha_n_2 --out $haps_ch_out --in $plink_ch_out --fam $famfile --gema $gema_file";
		    if ($multi_sw == 0) {
			push @preph_arr, $backbone;
		    }
		    if ($multi_sw == 1) {
			if ($qloc eq "qsub_b"){
			    push @preph_arr_mu1, $backbone;
			}
			else {
			    push @preph_arr_mu1, $backbone." $multi_txt";
			}
		    }
		    print "multi: $multi_sw\n";
		    if ($multi_sw == 2) {
			my $lastlog = `tail -1 $haps_ch_out.shape.log`;

			my $bimn = `wc -l $plink_ch_out.bim`;
			$bimn = $bimn * 1;
			print "$lastlog: $bimn\n";
			if ($lastlog =~ /fully missing individuals/) {
			    if ($bimn < 20) {
				print "setting empty: $bprefix.$refind.empty\n";
				&mysystem ("touch $subdir_empty/$bprefix.$refind.empty");
			    }
			    else {
				print "Warning setting empty: $bprefix.$refind.empty\n";
				&mysystem ("touch $subdir_empty/$bprefix.$refind.empty");
				&mysystem ("touch $subdir_empty/$bprefix.$refind.empty.error.nsnps");
			    }
			}
			else {
			    push @preph_arr_mu2, $backbone." $multi_txt";
			}
		    }
#		    print "$backbone\n";
#		    exit;
		}
		else {
		    $preph_fini++;
		}
	    }
	}
    }

#    print "stop after preph\n";
#    exit;


###################################
### send prephase jobs
###################################

    if (@preph_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "preph";
	$sjatime = 2;
#	$sjatime = 4 if ($preph_fini > 0);
	$sjamem = 4000;
	@sjaarray = @preph_arr;

	&send_jobarray;

    }


    if (@preph_arr_mu1 > 0) {

	$sjadir = $impute_dir;
	$sjaname = "preph_mu1";
	$sjatime = 20;
	unless ($qloc eq "qsub_b") {
	    $sjamulti = $multithread1;
	}
	$sjamem = 16000;
	@sjaarray = @preph_arr_mu1;

	&send_jobarray;

    }


    if (@preph_arr_mu2 > 0) {


	if ($qloc eq "qsub_b") {
	    print "Error: multithread currently not supported at Broad\n";
	    exit;
	}
	$sjadir = $impute_dir;
	$sjaname = "preph_mu2";
	$sjatime = 4;
	$sjaweek = 1;
	$sjamulti = $multithread2;
	$sjamem = 4000;
	@sjaarray = @preph_arr_mu2;

	&send_jobarray;

    }
    else {
	&mysystem ("touch $rootdir/prephase_done");
	print "prephasing done\n";
    }
}
#print "debug\n";
#exit;


############################################################
### write empty file
#############################################################

unless (-e "$rootdir/empty_info") {
    die $! unless open EM, "> empty_info.tmp";


    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;
	my $subdir_empty = "empty_$bprefix";
#	print "read emptydir....\n";
	if (-e $subdir_empty) {
	    opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	    my @indirempty = grep {/\.empty$/} readdir(IND);
	    closedir IND;
	    foreach (@indirempty) {
		print EM "$_\n";
	    }
	}
    }
    close EM;

    &mysystem ("mv empty_info.tmp $rootdir/empty_info");
}




############################################################
### pseudo
#############################################################


my $pseudo_done = 0;
if (-e "$rootdir/pseudo_done") {
    $pseudo_done = 1;
}

my @pseudo_arr = ();
if ($pseudo_done == 0) {

    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

#	my $subdir = "pi_$bprefix";
	my $subdir_in = "haps_$bprefix";
	my $subdir_empty = "empty_$bprefix";

	unless (exists $trioset_bimfli{$bprefix}){
	    next;
	}

	my $nfam = `cat $bprefix.fam.n`;
	chomp($nfam);
	
	my $splitn =  int ($nfam / $spliha_n);
	$splitn++;

	print "read emptydir....\n";
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;

	my %indirempty_hash = map { $_ => 1 } @indirempty;


	my @dirfiles = ();
	my @finifiles = ();

	print "read outdir....\n";
	opendir(DIR, $subdir_in) || die "can't opendir .: $!";
	@dirfiles = readdir(DIR);
	@finifiles = grep {/\.fini$/} @dirfiles;
	closedir DIR;

	my %finifiles_hash = map { $_ => 1 } @finifiles;

	print "start pseudo refdirloop....\n";
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {

	    my $refind = $refind_arr[$ccc];
	    my $rf = $reffiles[$ccc];
	    my $haps_ch_out = "$subdir_in/plink.$bprefix.$refind";
	    my $empty_ch_out = "$bprefix.$refind.empty";
	    my $refchunk = "$refdir/$rf";

	    if (exists $indirempty_hash{$empty_ch_out} ) {
		print "$bprefix.$refind.empty\n";
	    }
	    else {
#		print "N:$splitn\n";
		foreach my $spn (1..$splitn) {
		    
		    my $haps_file_loc = "$haps_ch_out.haps.spli$spn";
		    my $haps_file_loc_sample = "$haps_ch_out.haps.spli$spn.sample";
		    my $pseudo_ch_out = "$haps_ch_out.haps.spli$spn.trio";
		    my $pseudo_ch_out_fini = "plink.$bprefix.$refind.haps.spli$spn.trio.haps.fini";
		    my $chrX_txt  = "";
		    if ($phas == 923 || $phas == 9123 || $phas == 91231) {
			$chrX_txt  = "--chrX";
		    }
		    unless (exists $finifiles_hash{$pseudo_ch_out_fini} ) {
			my $sys_loc = "$pseudo_script $chrX_txt --out $pseudo_ch_out $haps_file_loc $haps_file_loc_sample";
#			print "$pseudo_ch_out_fini\n";
#			print "$sys_loc\n";
			push @pseudo_arr, $sys_loc;
#			exit;
		    }
		}
	    }
	}
    }

    


###################################
### send pseudo jobs
###################################
    
    if (@pseudo_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "pseudo";
	$sjatime = 1;
	$sjamem = 1000;
	@sjaarray = @pseudo_arr;
	
	&send_jobarray;
	
    }
    else {
	&mysystem ("touch $rootdir/pseudo_done");
	print "pseudo done\n";
    }

}


#exit;


############################################################
### impute2
#############################################################


my $imp2_done = 0;
if (-e "$rootdir/imp2_done") {
    $imp2_done = 1;
}

my $n_imp2_done = 0;
my @imp2_arr = ();
my $imp2_fini = 0;
if ($imp2_done == 0) {

    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $subdir = "pi_$bprefix";
	my $subdir_in = "haps_$bprefix";
	my $subdir_empty = "empty_$bprefix";

	unless (-e $subdir) {
	    &mysystem ("mkdir $subdir");
	}


	my $trio_sw = 0;
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;
	}

	my $nfam = `cat $bprefix.fam.n`;
	chomp($nfam);
	
	my $splitn =  int ($nfam / $spliha_n);
	$splitn++;

	print "read emptydir....\n";
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;

	my %indirempty_hash = map { $_ => 1 } @indirempty;


	my @dirfiles = ();
	my @finifiles = ();

	print "read outdir....\n";
	opendir(DIR, $subdir) || die "can't opendir .: $!";
	@dirfiles = readdir(DIR);
	@finifiles = grep {/\.fini$/} @dirfiles;
	closedir DIR;

	my %finifiles_hash = map { $_ => 1 } @finifiles;

	print "start imp2 refdirloop....\n";
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {


	    my $refind = $refind_arr[$ccc];
	    my $refstart = $refstart_arr[$ccc];
	    my $refend = $refend_arr[$ccc];
	    my $rf = $reffiles[$ccc];
	    my $gema_file = $gema_arr[$ccc];
	    my $haps_ch_out = "$subdir_in/plink.$bprefix.$refind";
	    my $empty_ch_out = "$bprefix.$refind.empty";
	    my $refchunk = "$refdir/$rf";

	    if (exists $indirempty_hash{$empty_ch_out} ) {
		print "$bprefix.$refind.empty\n";
	    }
	    else {
#		print "N:$splitn\n";
		foreach my $spn (1..$splitn) {
		    
		    my $haps_file_loc = "$haps_ch_out.haps.spli$spn";
		    my $haps_file_loc_sample = "$haps_ch_out.haps.spli$spn.sample";

		    if ($trio_sw) {
			$haps_file_loc = "$haps_ch_out.haps.spli$spn.trio.haps";
			$haps_file_loc_sample = "$haps_ch_out.haps.spli$spn.trio.sample";
		    }

		    my $pi_ch_out = "$subdir/plink.$bprefix.$refind.haps.spli$spn";
		    my $pi_ch_out_fini = "plink.$bprefix.$refind.haps.spli$spn.fini";

		    my $xtxt = "";

		    if ($phas == 923 || $phas == 9123 || $phas == 91231) {
			$xtxt = "--chrX $haps_file_loc_sample";
		    }
		   		    
		    unless (exists $finifiles_hash{$pi_ch_out_fini} ) {
			my $sys_loc = "$imp2_script $xtxt --out $pi_ch_out --in $haps_file_loc --refstart $refstart --refend $refend  --reffile $refchunk  --gema $gema_file";
#			print "$sys_loc\n";
			push @imp2_arr, $sys_loc;
#			exit;
		    }
		    else {
			$imp2_fini++;
			$n_imp2_done++;
		    }
		}
	    }
	}
    }

    


###################################
### send impute2 jobs
###################################
    
    if (@imp2_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "imp2";
	$sjatime = 2;
	$sjamem = 8000;
	if ($imp2_fini > 0) {
	    $sjamem = 14000;
#	    $sjatime = 4;
	}
	@sjaarray = @imp2_arr;
	
#	print "$sjaarray[0]\n";
#	print "debug\n";
#	exit;
	&send_jobarray;
	
    }
    else {
	&mysystem ("touch $rootdir/imp2_done");
	print "imp2 done\n";
    }

}



#exit;

############################################################
### dosing
#############################################################


my $dos_done = 0;
if (-e "$rootdir/dos_done") {
    $dos_done = 1;
}



my $dasudir = "$rootdir/dasu";
#print "dasudir: $dasudir\n";
#exit;

my @dos_arr = ();
my $dos_fini = 0;
if ($dos_done == 0) {
    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $dasudir_loc = "$dasudir"."_$bprefix";
#	print "$dasudir_loc\n";
	unless (-e $dasudir_loc) {
	    &mysystem("mkdir $dasudir_loc");
	}


#	my $subdir = "pi_$bprefix";
	my $subdir_in = "pi_$bprefix";
	my $subdir_empty = "empty_$bprefix";


	my $trio_sw = 0;
	my $trio_prefix = "";
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;

	    ## read out the prefix
	    die $!."$bprefix.fam" unless open FILE, "< $bprefix.fam";
	    my $line = <FILE>;
	    my @cells = split '\*', $line;
	    close FILE;

	    $trio_prefix = $cells[0];
	}

	my $nfam = `cat $bprefix.fam.n`;
	chomp($nfam);
	
	my $splitn =  int ($nfam / $spliha_n);
	$splitn++;

	print "read emptydir....\n";
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;

	my %indirempty_hash = map { $_ => 1 } @indirempty;


	my @dirfiles = ();
	my @finifiles = ();

	print "read outdir....\n";
	opendir(DIR, $dasudir_loc) || die "can't opendir .: $!";
	@finifiles = grep {/\.out.dosage.fini$/} readdir(DIR);
	closedir DIR;

#	print "<$finifiles[0]>\n";

	my %finifiles_hash = map { $_ => 1 } @finifiles;

	print "start dos refdirloop....\n";
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {

	    my $refind = $refind_arr[$ccc];
	    my $refchr = $refchr_arr[$ccc];
#	    my $haps_ch_out = "$subdir_in/plink.$bprefix.$refind";
	    my $empty_ch_out = "$bprefix.$refind.empty";
#	    my $refchunk = "$refdir/$rf";


	    my $dos_ch_out = "dos_$bprefix.$refind";
	    my $dos_ch_out_fini = "dos_$bprefix.$refind.out.dosage.fini";

	    my $plink_ch_out = "subbfile_$bprefix/plink.$bprefix.$refind";
	    my $famname_loc = "$plink_ch_out.fam";

	    my $prefix_loc = "";
	    if ($trio_sw == 1){
		$famname_loc = "haps_$bprefix/plink.$bprefix.$refind.haps.spli1.trio.fam";
		$prefix_loc = "--prefix $trio_prefix";
	    }


	    if (exists $indirempty_hash{$empty_ch_out} ) {
		print "$bprefix.$refind.empty\n";
	    }
	    else {
#		print "N:$splitn\n";
		my $puter_out_arr = "";
		foreach my $spn (1..$splitn) {
		    my $pi_ch_out = "$subdir_in/plink.$bprefix.$refind.haps.spli$spn.gz";
		    $puter_out_arr .= " $pi_ch_out";
		}

		   		    
		unless (exists $finifiles_hash{$dos_ch_out_fini} ) {
#		    print "really not existing: $dos_ch_out_fini\n ";
#		    exit;
		    ################################## work on this here!!!!!
		    my $sys_loc = "$dos_script $prefix_loc --outname $dos_ch_out --outdir $dasudir_loc  --chr $refchr --fam $famname_loc --bim $plink_ch_out.bim $puter_out_arr";
#		    my $sys_loc = "my.imp2 --out $pi_ch_out --in $haps_file_loc --refstart $refstart --refend $refend  --reffile $refchunk  --gema $gema_file";
#		    print "$sys_loc\n";
		    push @dos_arr, $sys_loc;
#		    exit;
		}
		else {
		    $dos_fini++;
		}

	    }
	}
    }

    


###################################
### send dos jobs
###################################
    
    if (@dos_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "dos";
	$sjatime = 2;
#	$sjatime = 4 if ($dos_fini > 0);
	$sjamem = 2000;
	$sjamaxpar = 100;
	@sjaarray = @dos_arr;
	
	&send_jobarray;
	
    }
    else {
	&mysystem ("touch $rootdir/dos_done");
	print "dos done\n";
    }

}


#print "debug after dos\n";
#exit;

############################################################
### best guess
#############################################################


my $dabg_done = 0;
if (-e "$rootdir/dabg_done") {
    $dabg_done = 1;
}


my @dabg_arr = ();
my $dabg_fini = 0;
if ($dabg_done == 0) {
    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;


	my $dasudir_loc = "$dasudir"."_$bprefix";
	my $dasudir_loc_qc1 = "$dasudir"."qc1_$bprefix";
	unless (-e "$dasudir_loc_qc1/qc1") {
	    &mysystem("mkdir -p $dasudir_loc_qc1/qc1");
	}

	my $subdir_empty = "empty_$bprefix";

	my $trio_sw = 0;
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;
	}

	print "read emptydir....\n";
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;
	my %indirempty_hash = map { $_ => 1 } @indirempty;


	print "read outdir....\n";
	opendir(DIR, "$dasudir_loc_qc1/qc1") || die "can't opendir : $!";
	my @finifiles = grep {/\.out.dosage.gz.fini$/} readdir(DIR);
	closedir DIR;
#	print "@finifiles\n";
#	print "$dasudir_loc_qc1\n";

	my %finifiles_hash = map { $_ => 1 } @finifiles;

	
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {

	    my $refind = $refind_arr[$ccc];
	    my $rf = $reffiles[$ccc];

	    my $dabgname = "$bprefix.$refind";
	    my $dasuqc1_fini = "dos_$bprefix.$refind.out.dosage.gz.fini";
	    my $empty_ch_out = "$bprefix.$refind.empty";
#	    print "$dasuqc1_fini\n";

	    if (exists $indirempty_hash{$empty_ch_out} ) {
		print "$bprefix.$refind.empty\n";
	    }
	    else {
		unless (exists $finifiles_hash{$dasuqc1_fini} ) {
		    my $options_tmp = "--info_th $info_th --freq_th $freq_th --bg_th $bg_th";
		    my $sys_loc = "$dabg_script $options_tmp --indir $dasudir_loc --outdir $dasudir_loc_qc1 $dabgname";
		    push @dabg_arr,$sys_loc;
		    
#		print "$sys_loc\n";
#		exit;
		}
		else {
		    $dabg_fini++;
		}
	    }
	}
    }

    
#    exit;

###################################
### send dabg jobs
###################################

    if (@dabg_arr > 0) {

	$sjadir = $impute_dir;
	$sjaname = "dabg";
	$sjatime = 2;
#	$sjatime = 4 if ($dabg_fini = 0);
	$sjamem = 1000;
	$sjamaxpar = 100;
	@sjaarray = @dabg_arr;

	&send_jobarray;

    }
    else {
	&mysystem ("touch $rootdir/dabg_done");
	print "dabg done\n";
    }
}




##################################################################################################################################
##################################################################################################################################
##################################################################################################################################


if (@bimfli_files == 0) {
    if (@bim_files > 0) {
	foreach (@bim_files) {
	    $_ =~ s/.bim$//;
	    push @bimfli_files, $_.".hg19.ch.fl";
	}
    }
}
if (@bimfli_files == 0 ) {
    print "Error: no datasets\n";
    exit;
}

#print @reffiles.": reffiles\n";
#print "debug\n";
#sleep (10);

#####################################
## combine best guess genotypes
#####################################


my $cobg_dir = "$rootdir/cobg_dir_$outname";    
if (0) {

unless (-e $cobg_dir) {
    &mysystem ("mkdir -p $cobg_dir");
}
print "start cobg\n";
my @cobg_arr;
my @prune_arr;   
my @merge_arr; 
unless (-e "$rootdir/cobg_done"){

    foreach my $rf (@reffiles) {
	my $refind = $rf;
	if ($refind =~ /chr[0-9]*_[0-9]*_[0-9]*/){
	    $refind =~ s/.*(chr[0-9]*_[0-9]*_[0-9]*).*/\1/;
	}
	else {
	    $refind =~ s/.*(chr[0-9]*_[0-9]*).*/\1/;
	}
#	print "$refind\n";
	
	my $cobg_fini = "$cobg_dir/cobg.$outname.$refind.fini";

	my $arrs = @cobg_arr;
#	print "$cobg_fini, n = $arrs\n";

	my $in = 1;
	unless (-e $cobg_fini) {
	    
	    my $in_list = "";
	    foreach (@bimfli_files) {
		my $bprefix = $_;
		$bprefix =~ s/.bim$//;
#		print "PREFIX: $bprefix\n";		
		
		my $dasudir_loc_qc1 = "$dasudir"."qc1_$bprefix";
		my $bfile_loc = "$dasudir_loc_qc1/bgs/dos_$bprefix.$refind.out.dosage.gz.qc2";
		my $bfile_check = "$dasudir_loc_qc1/bgs/dos_$bprefix.$refind.out.dosage.gz.qc2.bim";
#		my $bfile_check = "$dasudir_loc_qc1/qc1/dos_$bprefix.$refind.out.dosage.gz.empty";
		if (-e $bfile_check) {
		    $in_list .= " $bfile_loc";		
		}
		else {
		    print " $bfile_loc has no $bfile_check\n";		
		    $in = 0;
#		    exit;
		}
	    }

	    my $sys_loc = "$cobg_script --out cobg.$outname.$refind $in_list";
#	    print "inist: $sys_loc\n";
#	    exit;

	    
	    if ($in_list ne "") {
		if ($in == 1){
		    push @cobg_arr,$sys_loc;
		}

#		if ($in == 0){
#		    print "$sys_loc\n";
#		    exit;
#		}
	    }

	}
	else {
	    my $prune_fini = "$cobg_dir/prune.cobg.$outname.$refind.fini";
	    my $prune_out = "$cobg_dir/prune.cobg.$outname.$refind.out";
	    unless (-e $prune_fini) {
		unless (-e $prune_out) {

		    if (-e "$cobg_dir/cobg.$outname.$refind.bim") {
			my $sys_loc = "$prune_script cobg.$outname.$refind";
			push @prune_arr,$sys_loc;
		    }

		}
	    }
	    else {
		my $bfile_loc = "prune.bfile.cobg.$outname.$refind";
		if (-e "$cobg_dir/$bfile_loc.bim") {
		    unless (-e "$cobg_dir/prune.bfile.cobg.$outname.fini") {
			push @merge_arr, $bfile_loc;
		    }
		}
	    }

	}
    }
}

#print "@cobg_arr\n";
#print "debug\n";
#exit;










chdir ($impute_dir);

#my $n_cobg = @cobg_arr;
#print "NCOBG: $n_cobg\n";
#sleep(10);


###################################
## combine the best-guess chunks
###################################

if (@cobg_arr > 0) {
    
    $sjadir = $cobg_dir;
    $sjaname = "cobg";
    $sjatime = 2;
    $sjamem = 1000;
    @sjaarray = @cobg_arr;
    
    &send_jobarray;
    
}
else {
    print "cobg done\n";
}



###################################
## prune best-guess chunks
###################################


if (@prune_arr > 0) {
    
    $sjadir = $cobg_dir;
    $sjaname = "prune";
    $sjatime = 2;
    $sjamem = 1000;
    @sjaarray = @prune_arr;
    
    &send_jobarray;
    
}
else {
    print "prune_bg done\n";
}



###################################
## merge pruned best-guess chunks
###################################

if (@merge_arr > 0) {


    chdir ("$cobg_dir");

    my $fbfile = shift(@merge_arr);
    open ML, ">", "MERGE_list" or die $!;
    foreach my $bf (@merge_arr) {
	print ML "$bf.bed $bf.bim $bf.fam\n";
    }
    close ML;

    my @merge_job;
    push @merge_job, "$merge_script prune.bfile.cobg.$outname $fbfile MERGE_list";
    
    $sjadir = $cobg_dir;
    $sjaname = "merge";
    $sjatime = 2;
    $sjamem = 3000;
    @sjaarray = @merge_job;
    
    &send_jobarray;
    
}
else {
    &mysystem ("touch $rootdir/cobg_done");
    print "merge pruned combined done\n";
}


}



####################################################################################################
#### combine genome wide
####################################################################################


#####################################
## combine best guess genotypes
#####################################

my $cobg_gw_dir = "$rootdir/cobg_dir_genome_wide";    
unless (-e $cobg_gw_dir) {
    &mysystem ("mkdir -p $cobg_gw_dir");
}
print "start cobg genome wide\n";
my @cobg_gw_arr;
my $cobg_gw_fini= 0;    

unless (-e "$rootdir/cobg_gw_done"){
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;
	my $cobg_bgn_fini = "$cobg_gw_dir/$bprefix.bgn.fini";
	unless (-e $cobg_bgn_fini) {	    
	    my $dasudir_loc_bgn = "$dasudir"."qc1_$bprefix/bgn";
	    my $sys_loc = "$cobg_gw_script --out $bprefix.bgn $dasudir_loc_bgn";
#	    print "$sys_loc\n";
	    push @cobg_gw_arr,$sys_loc;
	}
	else {
	    $cobg_gw_fini++;
	}
	

	my $cobg_bg_fini = "$cobg_gw_dir/$bprefix.bg.fini";
	unless (-e $cobg_bg_fini) {	    
	    my $dasudir_loc_bg = "$dasudir"."qc1_$bprefix/bg";
	    my $sys_loc = "$cobg_gw_script --out $bprefix.bg $dasudir_loc_bg";
#	    print "$sys_loc\n";
	    push @cobg_gw_arr,$sys_loc;
	}
	else {
	    $cobg_gw_fini++;
	}

	my $cobg_bgs_fini = "$cobg_gw_dir/$bprefix.bgs.fini";
	unless (-e $cobg_bgs_fini) {	    
	    my $dasudir_loc_bgs = "$dasudir"."qc1_$bprefix/bgs";
	    my $sys_loc = "$cobg_gw_script --out $bprefix.bgs $dasudir_loc_bgs";
#	    print "$sys_loc\n";
	    push @cobg_gw_arr,$sys_loc;
	    
	}
	else {
	    $cobg_gw_fini++;
	}
    }
}


chdir ($impute_dir);

#exit;
###################################
## combine the best-guess chunks
###################################

if (@cobg_gw_arr > 0) {
    
    $sjadir = $cobg_gw_dir;
    $sjaname = "cobg_gw";
    $sjatime = 2;
#    $sjatime = 4 if ($cobg_gw_fini > 0);
    $sjamem = 12000;
    @sjaarray = @cobg_gw_arr;
    
    &send_jobarray;
    
}
else {
    &mysystem ("touch $rootdir/cobg_gw_done");
	
    print "cobg done\n";
}














###################################
## start pcaer with cobg
###################################
my $cobg_out = "$pcaer_dir/README.pcaer";
unless (-e $cobg_out) {

    chdir ($pcaer_dir);
    my $blist = "";
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;
#	print $bprefix."\n";
	&mysystem ("ln -fs  $cobg_gw_dir/$bprefix.bgs.bim");
	&mysystem ("ln -fs  $cobg_gw_dir/$bprefix.bgs.bed");
	&mysystem ("ln -fs  $cobg_gw_dir/$bprefix.bgs.fam");
	$blist .= " $bprefix.bgs";
    }
    
    my $pcaer_sys = "pcaer_[VERSION] --noproject --prefercase --preferfam --out cobg_gw.$outname $blist";
    &mysystem ("echo $pcaer_sys > README.pcaer");#
    
    
    chdir ($rootdir);

}




#print "debug\n";
#sleep(10);


if ($noclean) {
    print "exit before cleanup\n";
    exit;
}





chdir ($rootdir);


print "start cleaning\n";
my @clean_arr;    

unless (-e "$rootdir/clean_done"){


    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $dasudir_loc = "$dasudir"."_$bprefix";	
	my $dasudir_loc_qc1f = "$dasudir"."qc1_$bprefix/qc1f";
	my $pi_loc = "$rootdir/$impute_dir"."/pi_$bprefix";
	my $subf_loc = "$rootdir/$impute_dir"."/subbfile_$bprefix";
	my $haps_loc = "$rootdir/$impute_dir"."/haps_$bprefix";


#	print "---dataset $bprefix\n";

#	print "$dasudir_loc\n";
#	print "$dasudir_loc_qc1f\n";
#	print "$pi_loc\n";
#	print "$subf_loc\n";
#	print "$haps_loc\n";
#	print "$pi_eo_loc\n";

	if (-e "$dasudir_loc") {
	    push @clean_arr,  "$cleandir_script $dasudir_loc" unless (-e "$dasudir_loc/cleaned");
	}
	if (-e "$dasudir_loc_qc1f") {
	    push @clean_arr,  "$cleandir_script $dasudir_loc_qc1f" unless (-e "$dasudir_loc_qc1f/cleaned");
	}
	if (-e "$pi_loc") {
	    push @clean_arr,  "$cleandir_script $pi_loc" unless (-e "$pi_loc/cleaned");
	}
	if (-e "$subf_loc") {
	    push @clean_arr,  "$cleandir_script $subf_loc" unless (-e "$subf_loc/cleaned");
	}
	if (-e "$haps_loc") {
	    push @clean_arr,  "$cleandir_script $haps_loc" unless (-e "$haps_loc/cleaned");
	}


    }


    my $pi_eo_loc = "$rootdir/$impute_dir"."/errandout";
    if (-e "$pi_eo_loc") {
	push @clean_arr,  "$cleanerrandout_script $pi_eo_loc" unless (-e "$pi_eo_loc/cleaned");    
    }


    if (-e "$cobg_dir") {
	my $cobg_eo_loc = "$cobg_dir/errandout";
	if (-e "$cobg_eo_loc") {
	    push @clean_arr,  "$cleanerrandout_script $cobg_eo_loc" unless (-e "$cobg_eo_loc/cleaned");    
	}
	push @clean_arr,  "$cleandir_script --cobg $cobg_dir" unless (-e "$cobg_dir/cleaned");    
    }



###################################
### send clean jobs
###################################

    if (@clean_arr > 0) {

#	foreach (@clean_arr) {
#	    print "$_\n";
#	}
#	exit;
	$sjadir = $rootdir;
	$sjaname = "clean";
	$sjatime = 2;
	$sjamem = 1000;
	$sjamaxpar = 100;
	@sjaarray = @clean_arr;

	&send_jobarray;

    }
    else {
	&mysystem ("touch $rootdir/clean_done");
	print "cleaning done\n";
    }

}






########################################################
## du at end
#################################################



chdir ($rootdir);


print "start du\n";
my @du_arr;    

push @du_arr,  "$du_script" unless (-e "du.fini");    



###################################
### send clean jobs
###################################

if (@du_arr > 0) {
    
    
    $sjadir = $rootdir;
    $sjaname = "du";
    $sjatime = 2;
    $sjamem = 1000;
    @sjaarray = @du_arr;
    
    &send_jobarray;
    
}






###############################################
### mv the blueprint files to a safe place
###############################################
my $blueprint_sich_dir = "blueprint_bak";

unless (-e $blueprint_sich_dir){
    print "$blueprint_sich_dir is not existing, create one for you\n";
    my @created = mkpath(   ## $created ?
			    $blueprint_sich_dir,
			    {verbose => 0, mode => 0750},
	);
}


foreach my $floc (@files) {

    if ($floc =~ /^blueprint_jobs__/){
	print "mv $floc\n";
	my $target = $floc;
	while (-e "$blueprint_sich_dir/$target") {
	    $target = $target.".c";
	}
	&mysystem ("mv $floc $blueprint_sich_dir/$target");
    }
}


#################################################################
## print meta file
#################################################################

if (1) {
### print options with timestamp

my $now = localtime time;
die "$!: $outname.meta" unless open META, ">> $outname.meta";

my $refiexfile_meta = "NA";
my $triosetfile_meta = "NA";
if ($refiex_file) {
    $refiexfile_meta = $refiex_file;
}
if ($trioset_file) {
    $triosetfile_meta = $trioset_file;
}
$now =~ s/ /_/g;

print META "----------------------\t-----($now)----------\n";
print META "variable(see_also_help)\tvalue\n";
print META "reference_directory\t$refdir\n";
print META "reference_snp_info\t$refdir/$suminfo_s.chrXXX.gz\n";
print META "reference_batch_info\t$refdir/$suminfo_n\n";
print META "popname\t$popname\n";
print META "sfh\t$sec_freq\n";
print META "fth\t$fth_th\n";
print META "info_th\t$info_th\n";
print META "freq_th\t$freq_th\n";
print META "bg_th\t$bg_th\n";
print META "bg_miss_th(hardcoded)\t0.02\n";
print META "bgs_maf_th(hardcoded)\t0.05\n";
print META "bgs_miss_th(hardcoded)\t0.01\n";
print META "bgn_th(hardcoded)\tno_filter_on_maf_and_miss_compared_to_dosage\n";
print META "spliha_n\t$spliha_n\n";
print META "refiex_file\t$refiexfile_meta\n";
print META "trioset_file\t$triosetfile_meta\n";
print META "plink\t$ploc\n";
print META "impute2\t$i2loc\n";
print META "liftover\t$liloc\n";
print META "logfiles\t$loloc\n";



close META;

}

#print "@merge_arr\n";
#print "debug\n";
#exit;
#############################################################
## SUCCESSSS
#############################################################

$sjadir = $rootdir;
$sjaname = "finished";
push @sjaarray, "tmp";
$sjatime = 2;
$sjamem = 1000;

    
&send_jobarray;
