#!/usr/bin/env perl
use strict;


### on LISA: sjamem up to 2000 means with 16 cores the 32 GB machines
### if sjamem more than 2000 -> 64 GB nodes and reduced number of cores

########################################
#
#  version 24: 
#  
#  chunk results and then meta-chunk
#  area_plot_speed
#  new manhattan plot
#  new areator
#  chrX
#  new gwascatalog
#  exclude refind: cat pi_sub/prephase_job_list_multi1 | awk '{print $3}' | sed 's/.*chr/chr/' | sed 's/.bed//' > refind.ex
#
#  version 32:
#   checkflip with fth
#
#  version 46:
#   total cleanup, --phase cleaned, 
#
#  version 47:
#   reference lign up with per chromosome (readref) for improved efficacy, some improvement in variance translation
#   refdir directly
#   meta-file for options
#
#
#
#  version 49:
#   maxpar for high IO jobs (changed blueprint as well)
#
#  version 50:
#   zipped impute2-output (-o_gz)
#   du_out at end
#
# version 51:
#   compatibility with LISA
#
# version 52:
#   comp_dir for best guess
#
# version 53:
#   replaced cobg_chunks with cobg_gw, changed pcaer as well.
#
# version 54:
#   fixed bad error checking for dosager and dabg
#   fixed lost individuals with missing phenotype
#
# version 55:
#   serial
#
# version 56
#   probability check (patch from raymond)
#   UGER at broad
#
#######################################




### working on 35, stopped at after chucker........
### get the stuff at the end of onlymeta

### look for here working (2 spots)

## 37 with checkpos3 (also based on position)
## 38 with checkpos5 (PsychChip snp names)
## 39 with cleanup (not finished yet)
## 40 with buildguessing and liftover, also testing for binaries
## 41 error message an success message, cleaning finished, 
## 42 with my.imp.2 (including best guess haplotypes) 
##     change bcomb_3 into bcomp_3_p2
## 43 and 44: bug fixes
## 45 increase mem
## 46 cleanup, week jobs
## 47 read out reference per week.

#############################
# load utility functions
#############################

use FindBin;
use lib "$FindBin::Bin";
use Ricopili::Utils qw(trans);
use Ricopili::Version;




#my $version = "1.0.30";
my $progname = $0;
$progname =~ s!^.*/!!;
my $command_line = "$progname @ARGV";



my $jnum = 7; ### number of imputation job per node

my $spliha_n = 1500000; ## split haplotypes with N individuals

my $best_lahunt = 5;

my $phas = -1;




my $info_txt = "";
#my $homedir = "/home/gwas";
my $rootdir = "";

my $iname = "" ;


my $suminfo = "infosum_pos";
my $suminfo_n = "$suminfo.nsnps";
my $suminfo_c = "$suminfo.chunks";
my $suminfo_r = "$suminfo.reffiles";
#my $suminfo_s = "$suminfo.sorted";
my $suminfo_s = "NAN";

my $job_bn_th = 1000;

my @ref_coll = ();

my $chunkind = 10;


#my $hapmap_ref_root = "/home/gwas/pgc-samples/hapmap_ref/";


my $fth_th = 0.15;


use Sys::Hostname;
my $host = hostname;

my $serial = 0;

#my $broad = 1 if ($ENV{"DOMAINNAME"} =~ /broadinstitute/);


#############################
# read config file
#############################

my $ploc = &trans("p2loc");
my $shloc = &trans("shloc"); # shapeit
my $hmloc = &trans("hmloc");
#my $homedir = &trans("home");
my $qloc = &trans("queue");
my $i2loc = &trans("i2loc");
my $liloc = &trans("liloc");
my $email = &trans("email");
my $loloc = &trans("loloc");
my $bcmd = &trans("batch_jobcommand");

###############################################

#if ($broad) {
#    $hmloc = "/home/radon01/sripke/bakker_ripke/hapmap_ref/";
#    $homedir = "/home/radon01/sripke/";
#}

if ($bcmd eq "SERIAL") {
    $serial = 1;
    print "-----------------------------------------------------\n";
    print "switched on SERIAL mode because of configuration file\n";
}



$ref_coll[5] = "$hmloc"."1KG/phased/subchr" ;
$ref_coll[6] = "$hmloc"."1KG_june10/hapmap3_r2_plus_1000g_jun2010_b36_ceu/bgl/subchr_5" ;
$ref_coll[7] = "$hmloc"."1KG_aug10/subchr" ;
$ref_coll[8] = "$hmloc"."1KG_aug10_nodup/subchr" ;
$ref_coll[8883] = "$hmloc"."1KG_aug10_nodup/mhc_window" ;
$ref_coll[8882] = "$hmloc"."mars_window/1KG" ;

$ref_coll[88] = "$hmloc"."1KG_phas1_umich/ref_0611/subchr" ;
$ref_coll[881] = "$hmloc"."1KG_phas1_umich/ref_0611_eur/subchr" ;
$ref_coll[882] = "$hmloc"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr" ;
$ref_coll[8821] = "$hmloc"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/test2" ;
$ref_coll[8822] = "$hmloc"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/NOD2" ;
$ref_coll[88222] = "$hmloc"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/best_basam" ;
$ref_coll[882222] = "$hmloc"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/chr3_itih" ;
$ref_coll[882223] = "$hmloc"."1KG_phas1_umich/ref_0611_eur_3Mb/subchr/chr19_ncan" ;

$ref_coll[9999] = "$hmloc"."impute2_ref/1KG_Mar12/ALL_1000G_phase1integrated_feb2012_impute/test/subchr" ;
$ref_coll[9] = "$hmloc"."impute2_ref/1KG_Mar12/ALL_1000G_phase1integrated_feb2012_impute/subchr" ;
$ref_coll[91] = "$hmloc"."impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/subchr" ;
$ref_coll[9111] = "$hmloc"."impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/subchr/test" ;
$ref_coll[9123] = "$hmloc"."impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/chr23/subchr" ;
$ref_coll[91231] = "$hmloc"."impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/chr23/subchr/test" ;


$ref_coll[923] = "$hmloc"."impute2_ref/1KG_Mar12/ALL_1000G_phase1integrated_feb2012_impute/chr23/subchr" ;
$ref_coll[9231] = "$hmloc"."impute2_ref/1KG_Mar12/ALL_1000G_phase1integrated_feb2012_impute/chr23_pseudo/subchr" ;


$ref_coll[555] = "$hmloc"."1KG/phased/subchr/test";
$ref_coll[3] = "$hmloc"."subchr";
$ref_coll[323] = "$hmloc"."subchr/23";
$ref_coll[333] = "$hmloc"."subchr/test" ;

$ref_coll[334] = "$hmloc"."subchr/HLA" ;
$ref_coll[335] = "$hmloc"."subchr/chr2_test" ;
$ref_coll[3333] = "$hmloc"."subchr/test/local" ;
$ref_coll[3331] = "$hmloc"."subchr/SDCCA" ;
$ref_coll[3332] = "$hmloc"."mars_window" ;
$ref_coll[39] = "$hmloc"."impute2_ref/HM3/hapmap3_r2_b36/subchr" ;
$ref_coll[399] = "$hmloc"."impute2_ref/HM3/hapmap3_r2_b36/subchr.3mb" ;

$ref_coll[15] ="$hmloc"."hla_t1d/subchr";
$ref_coll[152] ="$hmloc"."impute2_ref/HLA_0813/orig/subchr";
$ref_coll[1521] ="$hmloc"."impute2_ref/HLA_0813/orig/hg19";
$ref_coll[1522] ="$hmloc"."mhc";
$ref_coll[515] = "$hmloc"."1KG/phased/subchr/hla" ;
$ref_coll[511] = "$hmloc"."1KG/phased/subchr/mir" ;
$ref_coll[512] = "$hmloc"."1KG/phased/subchr/mir708" ;
$ref_coll[513] = "$hmloc"."1KG/phased/subchr/HBII-108" ;

$ref_coll[514] = "$hmloc"."1KG/phased/subchr/chr10_11" ;
$ref_coll[515] = "$hmloc"."1KG/phased/subchr/chr8_9" ;
$ref_coll[516] = "$hmloc"."1KG/phased/subchr/cacna1c" ;
$ref_coll[517] = "$hmloc"."1KG/phased/subchr/csmd1" ;
$ref_coll[518] = "$hmloc"."1KG/phased/subchr/tcf4" ;
$ref_coll[519] = "$hmloc"."1KG/phased/subchr/chr2_20" ;
$ref_coll[520] = "$hmloc"."1KG/phased/subchr/top_scz" ;
$ref_coll[521] = "$hmloc"."1KG/phased/subchr/chr11_13" ;

$ref_coll[5555] ="$hmloc"."impute2_ref/lboettger/chr16";

$ref_coll[4] = "$hmloc"."CNV/subchr";
$ref_coll[444] = "$hmloc"."CNV/subchr_test";

$ref_coll[2] = "$hmloc"."phas2/subchr/outdir";
$ref_coll[222] = "$hmloc"."phas2/subchr/outdir/chr12_1";
$ref_coll[311] = "$hmloc"."hm3_ww/subchr";
$ref_coll[322] = "$hmloc"."finref/fineur/refdan/subchr";


my $info_th = 0.1;
my $freq_th = 0.005;
my $bg_th = 0.8;

my $sjamem_incr = 0;

my $multithread1 = 4;
my $multithread2 = 8;
my $multithread_v2 = 0;
if ($qloc eq "qsub_b"){
    $multithread1 = 1;
}
my $impwallinc=0;
my $sepa = 1;

#exit;

my $sec_freq = .2;  ## secure freq distance around 50%






my $popname = "eur";
my $buffer = 500;

##### help message
my $usage = "
Usage : $progname [options] --phase PHASE --outname OUTNAME

version: $rp_version


 --help            print this text and exits

 --phase INT       impute with HM - Phase INT as ref., no default; 
                       (mandatory if --refdir is not specified)

 --refdir STRING   full path of reference directory, overwrites --phase

 --outname STRING  identifier for imputation run (mandatory)



## differen prephasing algorithm

 --eagle           use eagle for prephasing: https://data.broadinstitute.org/alkesgroup/Eagle/
                   will still be using impute2 for imputation 


## different imputation algorithm

minimac3 is now default imputation algorithm (its no flag any more), here eagle will be used for prephasing
                   prephasing will happen reference informed with phased vcf (switched on be default)

 --impute2         use impute4/shapeit2 for prephasing / imputation 

 --impute4         use impute4/shapeit3 for prephasing / imputation 
                       (right now done with shapeit2, since trios do not work with shapeit3)

 --noref           use minimac3 and eagle but no reference during prephasing 
                      (some SNPs are not well matched with eagle but are with minimac)



#### for trio datasets

 --triset STRING  for subset of trio datasets (can contain bimfiles)

## outdated (switched off): --spliha INT      split haplotypes with N individuals


#### special handling of males on chr23 (haploid)

 --chr23male       some special options for phasing and postimputation

 --chr23trios      some special options for phasing trios on the X (in addition to --impute4 above)


##### alignment to reference:

  --popname STRING    important for freq-checks, either 
                           eur (default), 
                           asn (asian), 
                           amr (america), 
                           afr (africa), 
                           asw (african american)

  --sfh FLOAT         secure frequency around 50% (default: $sec_freq)
                                for checkflip (compare to reference),
                                only applied to AT/CG SNPs
  --fth FLOAT         frequency-diff to exclude SNPs, default $fth_th
                                for checkflip (compare to reference)



#### imputation:

  --buffer  INT       buffer in kb, default = $buffer


### outsourcing imputation, still doing alignment and postimp cleaning within the pipeline

  --deploy

 
#### post - imputation cleaning

  --info_th FLOAT  threshold for infoscore, default = $info_th

  --freq_th FLOAT  threshold for frequence (cases and controls), default = $freq_th

  --bg_th FLOAT    threshold for frequence (cases and controls), default = $bg_th


### technical options

  --refiex FILE       file containing refinds to exclude

  --sjamem_incr INT   increase all memore requests by INT Mb in steps of 1000 (1000 is 1Gb)
                           definitely necessary when not imputing with minimac3, since chunks are much bigger now
                           --sjamem_incr 4000 for 4GB

  --noclean           do not clean up intermediate files at the very end

  --force1            do not exit if same fail, but do this only once

  --sleep INT         sleep for INT seconds, try this if you think there is a race condition
                       (hints: stops a different steps, --serial works)
                       try using 5 seconds first.

  --serial            no sending jobs to queue all in one run
                          -> usually only used for testing  
  --sepa INT          use INT number of parallel jobs during serial


  --chunk INT         take chunks INT times bigger than original (default is $chunkind)
                            possilbe values are 1,2,5,10,20

  --multi INT         use multithreading of INT cores for prephasing (maybe later also for imputation
                      
  --minilong          use long jobs for minimac imputation (is better than multithreaded according to gonzalo): 24 hours
                      also works for vcf3dos (minimac3 and deployed)

  --phlong            use long jobs for phasing (instead of multithreading)

  --impwallinc INT    add INT hour to walltime of imputation jobs (also on top of minilong)

  --reference_info    write out file refiex_templ to be used with --refiex
                        cave: different for different values of --chunk
                        if already present, remove file reference_info and restart

  --onlyalign         stop after alignment (no prephasing or imputation), to be used for imputation servers

  --debug             extended output

### remarks 

  --phase is not mandatory any more, refdir is fine as well

  --outname is mandatory!!





 created by Stephan Ripke 2009 at MGH, Boston, MA

";

my $phase_txt = "
 --phase options

   2 for phase 2
   3 for phase 3
   99 for AA; 
   999 for AA_CNV; 
   4 for 1KG (likelihoods)
   5 for 1KG (phased)

   10 for fin; 
   20 for uber

   15 HLA   

   -1 test HM3
   -5 test phased 1KG


   1KG:        5 -> $ref_coll[5]
   1KG-june:   6 -> $ref_coll[6]
   1KG-aug10: 7 -> $ref_coll[7]
   1KG-aug10: 8 -> $ref_coll[8]
   1KG-aug10-mars-window: 8882 -> $ref_coll[8882]
   1KG-aug10-mhc-window: 8883 -> $ref_coll[8883]


#   1KG-mar11: 88 -> $ref_coll[88]    -> so not use!
   1KG-mar11: 881 -> $ref_coll[881]

   1KG-mar11-3Mb: 882 -> $ref_coll[882]
   1KG-mar11-test2: 8821 -> $ref_coll[8821]
   1KG-mar11-3Mb-NOD2: 8822 -> $ref_coll[8822]
   1KG-mar11-3Mb-best-basam: 88222 -> $ref_coll[88222]
   1KG-mar11-3Mb-chr3-itih: 882222 -> $ref_coll[882222]
   1KG-mar11-3Mb-chr19-ncan: 882223 -> $ref_coll[882223]


   HM3_test: 333 -> $ref_coll[333]
   HM3:      334 -> $ref_coll[334]
   HM3_chr2_test: 335 -> $ref_coll[335]
   HM3_chrX: 323 -> $ref_coll[323]
   HM3_test:3333 -> $ref_coll[3333]
  HM3_SDCCA:3331 -> $ref_coll[3331]
  HM3_SDCCA_mars_window:3332 -> $ref_coll[3332]



   HLA_types: 15 -> $ref_coll[15]
   HLA_types_0813: 152 -> $ref_coll[152]
   HLA_types_0813_hg19: 1521 -> $ref_coll[1521]
   HLA_0813_HM3_hg18: 1522 -> $ref_coll[1522]
   HLA_1KG:  515 -> $ref_coll[515]



   MIR137_1KG:  511 -> $ref_coll[511]
   MIR708_1KG:  512 -> $ref_coll[512]
   HBII-108-1KG:513 -> $ref_coll[513]
   CHR10_11_1KG:514 -> $ref_coll[514]
   CHR8_9_1KG:  515 -> $ref_coll[515]
   CACNA1C_1KG: 516 -> $ref_coll[516]
   CSMD1_1KG:   517 -> $ref_coll[517]
   TCF4_1KG:    518 -> $ref_coll[518]
   CHR2_20_1KG: 519 -> $ref_coll[519]
   SCZ_TOP:     520 -> $ref_coll[520]
   SIAE:        521 -> $ref_coll[521]
   1KG_test: 555 -> $ref_coll[555]

   CNV_HM3:        4 -> $ref_coll[4]
   CNV_HM3_test: 444 -> $ref_coll[444]

   HM3:        3 -> $ref_coll[3]


   HM2:        2 -> $ref_coll[2]
   HM2:        222 -> $ref_coll[222]
   HM3_WW:    311 -> $ref_coll[311]
   HM3_FINCEUTSI: 322 -> $ref_coll[322]



   1KG-feb12-3Mb-test: 9999 -> $ref_coll[9999]

   1KG-feb12-3Mb: 9 -> $ref_coll[9]
   1KG-aug12-3Mb: 91 -> $ref_coll[91]

   1KG-aug12-3Mb_chr23: 9123 -> $ref_coll[9123]



   1KG-aug12-3Mb-1batch: 9111 -> $ref_coll[9111]
   1KG-aug12-3Mb_chr23: 91231 -> $ref_coll[91231]


   1KG-feb12-3Mb-chr23: 923 -> $ref_coll[923]
   1KG-feb12-3Mb-chr23_pseudo: 9231 -> $ref_coll[9231]



   HM3(imp):        39 -> $ref_coll[39]
   HM3(imp).3mb:        399 -> $ref_coll[399]




    ";



my $mm3_sw = 1;


use Getopt::Long;
GetOptions( 


    "sjamem_incr=i"=> \$sjamem_incr,
    "info_th=f"=> \$info_th,
    "freq_th=f"=> \$freq_th,
    "bg_th=f"=> \$bg_th,
    "triset=s"=> \my $trioset_file,

    "help"=> \my $help,
    "serial"=> \my $serial_sw,
    "sepa=i"=> \$sepa,

    "sleep=i"=> \my $sleep_sw,

    "buffer=i"=> \$buffer,



    "outname=s"=> \my $outname,
    "deploy"=> \my $deploy,
    "refdir=s"=> \my $refdir_str,
    "phase=i"=> \ $phas,

    "sfh=f"=> \$sec_freq,
    "fth=f"=> \$fth_th,

    "spliha_n=i"=> \$spliha_n,
    "noclean"=> \my $noclean,
    "force1"=> \my $force1,
    "eagle"=> \my $eagle_sw,
#    "eaglvcf"=> \my $eagle_vcf_sw,
    "minimac3"=> \my $mm3_sw_outdated,
    "impute4"=> \my $imp4_sw,
    "impute2"=> \my $imp2_sw,
    "noref"=> \my $noref,


    "popname=s"=> \$popname,
    "refiex=s"=> \my $refiex_file,
    "chunk=s"=> \$chunkind,
    "multi=i"=> \$multithread_v2,
    "minilong"=> \my $minilong,
    "impwallinc=i"=> \$impwallinc,
    "phlong"=> \my $phaselong,
    "reference_info"=> \my $reference_info_file,
    "chr23male"=> \my $chr23male,
    "chr23trios"=> \my $chr23trios,
    "onlyalign"=> \my $onlyalign,
#    "chunk10"=> \my $chunk10,
    "debug"=> \my $debug,	
    );



if ($serial_sw) {
    $serial = 1;
}
if ($mm3_sw_outdated) {
    print "--minimac3 is outdated (its default now), please start without this flag\n";
    exit;
}


if ($imp2_sw) {
    $mm3_sw = 0;
}
if ($imp4_sw) {
    $mm3_sw = 0;
}


if ($sleep_sw) {
  print "sleeping for $sleep_sw seconds (only use if suspect of race condition)\n";
  sleep ($sleep_sw);
}



$suminfo_c .= "_$chunkind";
#print "$suminfo_c\n";
#exit;

############################################################
## testing binaries
##############################################################
my @test_scripts;


my $readref_script = "my.readref";         ### my.pipeline_tar
my $readrefsum_script = "my.readref_sum";  ### my.pipeline_tar
my $buigue_script = "buigue";              ### my.pipeline_tar
my $checkpos_script = "checkpos6";         ### my.pipeline_tar
my $checkflip_script = "checkflip4";       ### my.pipeline_tar
my $chuck_script = "my.chuck2";            ### my.pipeline_tar
my $preph_script = "my.preph3";             ### my.pipeline_tar
my $imp_script = "my.imp.3";             ### my.pipeline_tar
my $dos_script = "haps2dos4";              ### my.pipeline_tar
my $prepdep_script = "my.prepdep";              ### my.pipeline_tar
my $vcf2dos_script = "vcf2dos";            ### my.pipeline_tar
my $impprob_script = "impprob_to_2dos";    ### my.pipeline_tar
my $dabg_script = "daner_bg3";             ### my.pipeline_tar
my $cobg_script = "bcomb_5_p2";            ### my.pipeline_tar
my $cobg_gw_script = "comb_bg_dir_1";      ### my.pipeline_tar
my $prune_script = "my.prune";             ### my.pipeline_tar
my $merge_script = "my.merge";             ### my.pipeline_tar
my $pseudo_script = "haps2pseudo2";        ### my.pipeline_tar
my $lift_script = "lift18219";             ### my.pipeline_tar
my $trisha_script = "trio2shape";          ### my.pipeline_tar
my $splithap_script = "splithap_1";        ### my.pipeline_tar
my $cleandir_script = "my.cleandir";       ### my.pipeline_tar
my $cleanerrandout_script = "my.cleanerrandout";  ### my.pipeline_tar
my $pdflatex_script = "pdflatex";          ### my.pipeline_tar
my $mystart_script = "my.start_job";       ### my.pipeline_tar
my $mutt_script = "mail";                  ### my.pipeline_tar
my $du_script = "my.du";                   ### my.pipeline_tar
my $blue_script = "blueprint";             ### my.pipeline_tar
my $preploo_script = "my.preploo2";        ### my.pipeline_tar
my $qacct_script = "my.qacct";             ### my.pipeline_tar
my $performance_script = "jobperformance"; ### my.pipeline_tar
my $vcfsort_script = "vcf-sort";           ### my.pipeline_tar



push @test_scripts, $readref_script;
push @test_scripts, $impprob_script;
push @test_scripts, $readrefsum_script ;
push @test_scripts, $buigue_script ;
push @test_scripts, $checkpos_script ;
push @test_scripts, $checkflip_script ;
push @test_scripts, $chuck_script ;
push @test_scripts, $preph_script ;
push @test_scripts, $imp_script ;
push @test_scripts, $dos_script ;
push @test_scripts, $vcf2dos_script ;
push @test_scripts, $dabg_script ;
push @test_scripts, $cobg_script ;
push @test_scripts, $cobg_gw_script ;
push @test_scripts, $prune_script ;
push @test_scripts, $merge_script ;
push @test_scripts, $pseudo_script ;
push @test_scripts, $lift_script ;
push @test_scripts, $trisha_script ;
push @test_scripts, $pdflatex_script ;
push @test_scripts, $splithap_script ;
push @test_scripts, $cleandir_script ;
push @test_scripts, $cleanerrandout_script ;
push @test_scripts, $du_script ;
push @test_scripts,  $mystart_script;
push @test_scripts,  $blue_script;
push @test_scripts,  $preploo_script;
push @test_scripts,  $qacct_script;
push @test_scripts,  $performance_script;

if ($deploy) {
    push @test_scripts,  $vcfsort_script;
}

#push @test_scripts, $mutt_script ;


#print "$rp_logo\n";

$rp_header =~ s/MODULE/impute_dirsub/;

print "$rp_header\n";


print ".......testing necessary binaries....\n" if ($debug);
my @miss_scripts;


#my $err_scr = 0;
foreach my $scr_name (@test_scripts) {
    my $scr_path = '';
    
    for my $path ( split /:/, $ENV{PATH} ) {
	if ( -f "$path/$scr_name" && -x _ ) {
	    print "$scr_name\tfound in $path\n" if ($debug);
	    $scr_path = "$path/$scr_name";
	    last;
	}
    }
    if ( $scr_path eq  '') {
	push @miss_scripts, "cp /home/unix/sripke/bin/$scr_name ./\n";
	print "!!Error!! : No $scr_name command available\n" ;
    }
 
}



if (@miss_scripts > 0) {
  if (-e "get_scripts_on_broad.txt") {
    print "please remove this file and restart: get_scripts_on_broad.txt\n";
  }
  die $! unless open FILE1, "> get_scripts_on_broad.txt";
  foreach (@miss_scripts) {
    print FILE1 "$_";
  }
  close FILE1;


  print "exiting now -> have a look at get_scripts_on_broad.txt\n";
  exit;

}






print ".......testing email program....\n" if ($debug);

my $err_scr = 0;
my $noti =1;
{
    my $scr_path = '';
    
    for my $path ( split /:/, $ENV{PATH} ) {
	if ( -f "$path/$mutt_script" && -x _ ) {
	    print "$mutt_script\tfound in $path\n" if ($debug);
	    $scr_path = "$path/$mutt_script";
	    last;
	}
    }
    unless ( $scr_path ) {

	print "!!Warning!! : No $mutt_script command available, trying mutt\n" if ($debug);

	$mutt_script = "mutt";
	for my $path ( split /:/, $ENV{PATH} ) {
	    if ( -f "$path/$mutt_script" && -x _ ) {
		print "$mutt_script\tfound in $path\n" if ($debug);
		$scr_path = "$path/$mutt_script";
		last;
	    }
	}
	unless ( $scr_path ) {
	    #	    $err_scr = 1;
	    print "!!Warning!! : No $mutt_script command available, no email notifications\n";
	    $noti = 0;
	}
    }
 
}
die if $err_scr == 1;


print "....all necessary binaries found....\n" if ($debug);
print "------------------------------------\n" if ($debug);
#push @scripts,"id_tager_3";



#####################################
# "testing environment variable rp_perlpackages
####################################

print "testing environment variable rp_perlpackages....\n" if ($debug);;
unless (exists $ENV{rp_perlpackages}) {
    print "Error: no environment variable for perl-packages, please re-install ricopili and make sure to follow all instructions\n" ;
    print "------------------------------------\n" ;
    exit;
}
print "....all set....\n" if ($debug);
print "------------------------------------\n" if ($debug);















my $nomega_sw = 1;



my $nomega = 0;
$nomega = 1 if ($nomega_sw);




die $usage if $help;

die $usage unless $outname;
if ($phas == -1) {
    unless ($refdir_str) {
	print "$usage\n";
	exit;
    }
}
if ($phas == 0) {
    unless ($refdir_str) {
	print "$phase_txt\n";
	exit;
    }
}



#my ($xsnp,$xchr,$xbeg,$xend);
#($xsnp,$xchr,$xbeg,$xend)= split ',', $xareastr if ($xareastr);


if ($phas == 9) {
  print "please do not use old reference any more\n";
  exit;
}

my $p2_txt = "";
if ($phas == 2 ){
    $p2_txt = "--phase2";
}


my $refdir = "";

if ($refdir_str) {
    $refdir = $refdir_str;
}
else {
    $refdir = $ref_coll[$phas];
}

unless (-d $refdir) {
    print "reference directory ($refdir) is not existing\n";
    exit;
}


my $impute_dir = "pi_sub";


#my $postimp_dir = "$impute_dir/postimp_data";







sub fisher_yates_shuffle {
    my $deck = shift;  # $deck is a reference to an array
    my $i = @$deck;
    while ($i--) {
	my $j = int rand ($i+1);
	@$deck[$i,$j] = @$deck[$j,$i];
    }
}


#####################################
# print array to file
####################################

sub a2file {
    my ($file, @lines)=@_;
    die $! unless open FILE, "> $file";
    foreach (@lines){
	print FILE $_;
    }
    close FILE;
}


###################################################
###  system call with test if successfull
###################################################
sub mysystem(){
    my ($systemstr)="@_";
    system($systemstr);
    my $status = ($? >> 8);
    die "$systemstr\n->system call failed: $status" if ($status != 0);
}


##########################################
# subroutine to split a plink-output-line
##########################################

sub split_line {
    my ($line)=@_;
    chomp($line);
    $line =~ s/^[\s]+//g;
    my @cols=  split /\s+/, $line;
}

##########################################
# subroutine to split a plink-output-line with references
##########################################

sub split_line_ref {
    my ($line)=${$_[0]};
    chomp($line);
    $line =~ s/^[\s]+//g;
    my @cols=  split /\s+/, $line;
    \@cols;
}





#####################################
# print array to file with newline
####################################

sub a2filenew {
    my ($file, @lines)=@_;
    die $! unless open FILE, "> $file";
    foreach (@lines){
	print FILE "$_\n";
    }
    close FILE;
}


#####################################
# append array to file with newline
####################################

sub a2filenew_app {
    my ($file, @lines)=@_;
    die "$!: $file" unless open FILE, ">> $file";
    foreach (@lines){
	print FILE "$_\n";
    }
    close FILE;
}

#####################################
# subroutine to count lines of a file
#####################################

sub count_lines {
    my ($file)=@_;
    my $lc=0;
    die "$file: ".$! unless open FILE, "< $file";
    while (<FILE>){
	$lc++;
    }
    close FILE;
    $lc;
}



#####################################
# subroutine to re-invoke this script
#####################################

sub reinvo_b {
    my ($message, $wt_file)=@_;
    my $now = localtime time;
    my $old_cmd = `tail -3 $loloc/impute_dir_info | head -1`;

    my $message_part = $info_txt."\t$message";
    $message = $info_txt."\t$message\t$now";

    &a2filenew_app("$loloc/impute_dir_info",$message);
    die "2 times already" if ($old_cmd =~ /$message_part/);
    chdir "$rootdir" or die "something strange";
    if ($qloc eq "bsub") {
	$wt_file =~ s/.*blueprint_joblist_file-//;
    }

    my $sys_re = "$blue_script --njob $job_bn_th -b \"$command_line\" --wa 4 --di -j --fwt $wt_file --na _if_$outname";
#    print "$sys_re\n";
    &mysystem ($sys_re);
    exit;

}


#####################################
# send jobs to cluster and also send navi again
#####################################

my $sjadir = "";
my $sjaweek = 0;
my $sjaname = "";
my $sjarow = "";
my @sjaarray;
my $sjamem = 0;
my $sjacores = 16;
my $sjamaxpar = 0;

my $sjatime = -1;
my $sjamaxjobs = 30000;


if ($qloc eq "qsub") {
    $sjamaxjobs = 8000;
}


my $sjainfofile = "$loloc/impute_dir_info";
unless (-e $sjainfofile) {
    print "log-file ($sjainfofile) is not existing\n";
    print "please check loloc in ~/ricopili.conf\n";
    exit;
}
#my $sjainfofile = "$homedir/impute_dir_info_35_test";
my $sjainfotxt = "";
my $sjamulti = 0;


sub send_jobarray {

    die "send_jobarray with undefined variables, dir" if ($sjadir eq "");
    die "send_jobarray with undefined variables, name" if ($sjaname eq "");
    die "send_jobarray with undefined variables, array" if (@sjaarray == 0);
    die "send_jobarray with undefined variables, mem" if ($sjamem == 0);
    die "send_jobarray with undefined variables, time" if ($sjatime < 0);
    die "send_jobarray with undefined variables, info" if ($sjainfotxt eq "");

    print "Running job: $sjaname\n";

    my $now = localtime time;
    $now =~ s/ /_/g;


    if ($sjaname eq "finished") {

	my $fini_message ;
	$fini_message .= "\n\n##################################################################\n";
	$fini_message .= "##### CONGRATULATIONS: \n";
	$fini_message .= "##### rp_pipeline finished successfully:\n";
	$fini_message .= "##### $sjainfotxt\n";
	$fini_message .= "##### now start with PCA (see README in subdir pcaer_sub/)\n";
	$fini_message .= "##### or directly with postimputation analysis\n";
	$fini_message .= "##### have a look at the wiki page\n"; 
	$fini_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n";
	$fini_message .= "##################################################################\n";
	print "$fini_message\n";

	
	die $! unless open SUC, "> success_file";
	print SUC $fini_message."\n";
	close SUC;

	if ($noti == 1) {
	    
	    my $sys_success = 'cat success_file | '.$mutt_script.' -s RP_pipeline_finished '.$email ;
	    &mysystem ($sys_success) ;
	}
    
	my $sjarow      = $sjainfotxt."\t$sjaname\t$now";
	&a2filenew_app("$sjainfofile",$sjarow);


	exit;

    }


    chdir ($sjadir);
    my $jobfile = "$sjaname.job_list";
    while (-e $jobfile) {
	$jobfile .= ".s";
#	if (@sjaarray < 200) {
#	    $sjatime = 4;
#	}
    }


    
    &a2filenew ($jobfile, @sjaarray);

#    print "$jobfile\n";
#    print "sleep\n";
#    sleep(10);

#    exit;

    my $nsja = @sjaarray;


    
    my $nsja_loc = $nsja;
    if ($nsja_loc > $sjamaxjobs) {
	$nsja_loc = $sjamaxjobs;
    }
    
    

    my $multi_txt = "";
    if ($sjamulti > 0) {
	$multi_txt = "--multi $nsja_loc,$sjamulti";
    }

    ### with array
    $sjamem = $sjamem + $sjamem_incr;



    my $sja_week_str = "";
    if ($sjaweek > 0) {
	$sja_week_str = "--week 1";
    }



#    my $jobfile = "$sjaname.$outname";
#    while (-e "j.$jobfile" || -e "blueprint_joblist_file-$jobfile"){
#	$jobfile .= ".j";
#    }

    
    
    if ($serial) {

	print "starting step $sjaname with ".@sjaarray." jobs\n" if ($debug);
	print "running up to $sepa parallel jobs.\n" if ($debug);


	my $jc = 1;
#	my $job_str = "";
	my @job_sepa_arr;

	foreach (@sjaarray) {
	    print "running job $jc...\n" if ($debug);
	    push @job_sepa_arr, "$_ &";
#	    $job_str .= "$_ & \n";

	    if ($jc % $sepa == 0) {
		push @job_sepa_arr, "wait";
		#		$job_str .= "wait\n";
		my $sepa_file = "$sjaname.sepa.$jc";
		&a2filenew ($sepa_file,@job_sepa_arr);
		print "sepa_file: ".$sepa_file."\n" if ($debug);
		&mysystem("chmod u+x $sepa_file");
		&mysystem("./$sepa_file");
		@job_sepa_arr = ();
	    }
	    $jc++;
	    
	}

	if (@job_sepa_arr > 0) {
	    $jc--;
	    push @job_sepa_arr, "wait";
	    
	    my $sepa_file = "$sjaname.sepa.$jc";
	    &a2filenew ($sepa_file,@job_sepa_arr);
	    print "sepa_file: ".$sepa_file."\n" if ($debug);
	    &mysystem("chmod u+x $sepa_file");
	    &mysystem("./$sepa_file");
	}
	
#	exit;
	
	
    }
    else {

	my $sys_loc = "$blue_script $sja_week_str --maxpar $sjamaxpar --noerr --njob $nsja_loc --array $jobfile --wa $sjatime --mem $sjamem --cores $sjacores --j --na $jobfile $multi_txt";
#	print "$sys_loc\n";
#	exit;

#	if (0) {
	    &mysystem ($sys_loc);
#	}
    }
#    exit;


    my $old_cmd = `tail -1 $sjainfofile | head -1`;

    my $nsja_txt = sprintf "%06d",$nsja;

    my $sjacontent = "$sjaname.".$nsja_txt;

    my $sjarow_part = $sjainfotxt."\t$sjacontent";
    my $sjarow      = $sjainfotxt."\t$sjacontent\t$now";
#    $message = $info_txt."\t$message\t$now";

    &a2filenew_app("$sjainfofile",$sjarow);

#    print "debug\n";
#    print "old message: $old_cmd\n";
#    print "sjaarow_part : $sjarow_part\n";
#    exit;

    
    if ($old_cmd =~ /$sjarow_part/){
	unless ($force1 ){
	    my $err_message ;
	    $err_message .= "##################################################################\n";
	    $err_message .= "##### Error: \n";
	    $err_message .= "##### step $sjaname has been done repeatedly without any progress\n";
	    $err_message .= "##### imputation pipeline stopped: $command_line\n";
	    $err_message .= "##### $sjainfotxt\n";
	    $err_message .= "##### if reason does not appear obvious\n";
	    $err_message .= "##### have a look at the wiki page\n"; 
	    $err_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n";
	    $err_message .= "##### or contact the developers\n";
	    $err_message .= "##### version: $rp_version\n";
	    $err_message .= "##################################################################\n";
	    print "$err_message\n";

	    die $! unless open ERR, "> error_file";
	    print ERR $err_message."\n";
	    close ERR;


	    if ($noti == 1) {
		
		&mysystem ('cat error_file | '.$mutt_script.' -s RP_pipeline_error '.$email) ;

	    }

#	    unless ($serial) {
		exit;
#	    }

	}

    }


    $command_line =~ s/--force1//;

    my $wt_file = "$sjadir/j.$jobfile.id";
    chdir "$rootdir" or die "something strange";
    
    


    if ($serial) {
	my $sys_re = "$command_line";
	&mysystem ($sys_re);
	exit;
    }
    else {
	my $motherjobfile = "_if_$outname";
#	print "$motherjobfile\n";
#	exit;
	while (-e "j.$motherjobfile") {
	    $motherjobfile .= ".s";
	}
	my $sys_re = "$blue_script --njob $job_bn_th -b \"$command_line\" --wa 2 --di -j --fwt $wt_file --na $motherjobfile";
#	print "$sys_re\n";
#	exit;
	&mysystem ($sys_re);
    }



    print "------------------------------------------------------------\n";
    print "$nsja jobs successfully submitted\n";
    print "please see tail of $sjainfofile for regular updates\n";
    print "also check bjobs -w for running jobs\n";
    print "possibly differnt command on different computer cluster: e.g. qstat -u USER\n";
    print "you will be informed via email if errors or successes occur\n";
    print "------------------------------------------------------------\n";

    exit;


}




#####################################
# subroutine to re-invoke this script
#####################################

sub reinvo_b_week {
    my ($message, $wt_file)=@_;
    my $now = localtime time;
    my $old_cmd = `tail -3 $loloc/impute_dir_info | head -1`;

    my $message_part = $info_txt."\t$message";
    $message = $info_txt."\t$message\t$now";

    &a2filenew_app("$loloc/impute_dir_info",$message);
    die "2 times already" if ($old_cmd =~ /$message_part/);
    chdir "$rootdir" or die "something strange";
    if ($qloc eq "bsub") {
	$wt_file =~ s/.*blueprint_joblist_file-//;
    }

    &mysystem ("$blue_script --week 1 --njob $job_bn_th -b \"$command_line\" --wa 10 --di -j --fwt $wt_file --na _if_$outname");
    exit;

}



##############################################
##############################################
#############  BEGIN
##############################################
##############################################


use Cwd;
use File::Path;
$rootdir = &Cwd::cwd();
$sjainfotxt = "$rootdir\t$command_line";






my $pcaer_dir = "$rootdir/pcaer_sub";
my $archive_dir = "/archive/gwas/scz/archive/$outname";


#print "to archive it back:\n";
#print "rsync -ave ssh gwas\@lisa.sara.nl:$archive_dir/pi_*.tar.gz $impute_dir/ \n";

#exit;

unless (-e $impute_dir){
    print "impute_dir is not existing, create one for you\n" if ($debug);
    my @created = mkpath(   ## $created ?
			    $impute_dir,
			    {verbose => 0, mode => 0750},
	);
}




unless (-e $pcaer_dir){
    print "pcaer_dir is not existing, create one for you\n" if ($debug);
    my @created = mkpath(   ## $created ?
			    $pcaer_dir,
			    {verbose => 0, mode => 0750},
	);
}



#unless (-e $archive_dir){
#    print "$archive_dir\n";
#    exit;
#    print "archive_dir is not existing, create one for you\n";
#    my @created = mkpath(   ## $created ?
##			    $archive_dir,
#			    {verbose => 0, mode => 0750},
#	);
#}

#exit;

#####################################
# create suminfo if not existing
#####################################

#unless (-e "$refdir/$suminfo_s"){
#    print "WARING: $refdir/$suminfo_s not existing\n";
#    chdir ($refdir);
#    &mysystem ("cat *.info_pos  | grep -v SNP > $suminfo");
#    &mysystem ("sort -k1,1 -u $suminfo > $suminfo_s");
#    chdir ($rootdir);
#}


#####################################
## if new frequency file is existing
###################################

#my $refvcf_s_v2 = "";

my $suminfo_s_v2 = "";
my $suminfo_s_v2_sw = 0;

my %ref_hash;
if (-e "$refdir/reference_templ") {
    print "new format (reference_template) found.\n" if ($debug);
    die unless open IN, "< $refdir/reference_templ";
    while (my $line = <IN>) {
	my @cells = &split_line($line);
	$ref_hash{$cells[0]} = $cells[1];
#	print "$cells[0]\t";
#	print "$cells[1]\n";
	
    }
    unless (exists $ref_hash{"out_template"}) {
	print "Error: no entry out_template in (reference_template)\n";
	exit;
    }
    $suminfo_s_v2_sw = 1;
    $suminfo_s_v2 = $ref_hash{"out_template"};
}
#exit;




print "reference_v2: $suminfo_s_v2_sw\n" if ($debug);
#print "refvcf_v2: $refvcf_s_v2\n";
#exit;
#####################################
## if new frequency file is existing
###################################
my $popname_uc = uc($popname);

if ($suminfo_s_v2_sw ==0) {
    if (-e "$refdir/sumfrq.$popname") {
	$suminfo_s = "sumfrq.$popname";
    }
    else {
	if (-e "$refdir/sumfrq.$popname_uc") {
	    $suminfo_s = "sumfrq.$popname_uc";
	}
	else {
	    print "$refdir/sumfrq.$popname_uc in refdir is not existing!!!\n";
	    die;
	    #	sleep(10);
	}
    }
}

#print $refdir."\n";
#print $suminfo_s."\n";
#exit;


unless (-e "$refdir/$suminfo_n"){
    print "Error: $refdir/$suminfo_n not existing\n";
    exit;
#    chdir ($refdir);
#    &mysystem ("wc -l *.info_pos > $suminfo_n");
#    chdir ($rootdir);
}


#unless (-e "$refdir/$suminfo_r"){
#    print "Warning: $refdir/$suminfo_r not existing\n";
#    die;
#    chdir ($refdir);
#    &mysystem ("ls sc_*.bgl > $suminfo_r");
#    chdir ($rootdir);
#}

#my @refallfiles;

#    opendir(DIR, "$refdir") || die "can't opendir .: $!";
#    @refallfiles = readdir(DIR);
#    closedir DIR;
#}
my $cc=0;



my %refiex;
if ($refiex_file) {
    print "read $refiex_file\n" if ($debug);
    die $!." <$refiex_file>" unless open IN, "< $refiex_file";
    while (my $line = <IN>){
	chomp($line);
	$refiex{$line} = 1;
#	print "$line\n";
    }
    close IN;

}







my @reffiles;



#if ($suminfo_s_v2 eq "") {
print "read $refdir/$suminfo_n....\n" if ($debug);
die $!." <$refdir/$suminfo_n>" unless open IN, "< $refdir/$suminfo_n";
while (my $line = <IN>){
    my @cells = &split_line($line);
    die "problem with $refdir/$suminfo_n" if (@cells < 2);
    my $bgl_file = $cells[1];
    $bgl_file =~ s/.info_pos$//;
    
    if ($refiex_file) {
	
	my $refind = $bgl_file;
	if ($refind =~ /chr[0-9]*_[0-9]*_[0-9]*/){
	    $refind =~ s/.*(chr[0-9]*_[0-9]*_[0-9]*).*/\1/;
	}
	else {
	    $refind =~ s/.*(chr[0-9]*_[0-9]*).*/\1/;
	}
	
	if (exists $refiex{$refind}){
	    print "exclude: $bgl_file\n" if ($debug);
	    next;
	}
	
    }
    
    next if ($bgl_file eq "total");
    
    #	print "reffile: $bgl_file\n";
    push @reffiles, $bgl_file;
    $cc++;
}
close IN;
die "reference directory <$refdir> empty (no sc.*bgl)" if (@reffiles == 0);
#}










#print "finished reading $refdir/$suminfo_n\n";




#exit;
#print "sleep\n";
#sleep(5);


my @files = ();
opendir(DIR, ".") || die "can't opendir .: $!";
@files = readdir(DIR);
closedir DIR;

my @pi_files = ();

unless (-e "$rootdir/puting_done") {
    opendir(DIR, "$impute_dir") || die "can't opendir .: $!";
    @pi_files = readdir(DIR);
    closedir DIR;
}


### read bim-files
my @bim_files = grep {/bim$/} @files;
#print "@bim_files\n";

foreach (@bim_files) {
    if ($_ =~ /.hg19.ch.fl.bim$/){
	print "wrong filename, will rename:\n";
	my $obfile = $_;
	$obfile =~ s/.bim$//;
	my $nbfile = $obfile;
	$nbfile =~ s/.hg19.ch.fl/.bf/;
	print "mv $obfile.bed/bim/fam $nbfile.bed/bim/fam\n";
	&mysystem ("mv $obfile.fam $nbfile.fam");
	&mysystem ("mv $obfile.bed $nbfile.bed");
	&mysystem ("mv $obfile.bim $nbfile.bim");
	print "to redo\n";
	print "mv $nbfile.bim $obfile.bim\n";
	print "mv $nbfile.fam $obfile.fam\n";
	print "mv $nbfile.bed $obfile.bed\n";

	exit;
    }
}

#print "sleep\n";
#sleep(10);

my @bimfli_files = grep {/.ch.fl.bim$/} @pi_files;
my @bimdep_files = grep {/dep.fini$/} @pi_files;
my @bimpos_files = grep {/.ch.bim$/} @pi_files;
my @bimref_files = grep {/.bim.ref/} @pi_files;
my @bimhg19_files = grep {/.hg19.bim$/} @pi_files;
my @fini_files = grep {/.fini$/} @pi_files;
if (-e "$rootdir/puting_done") {
#if (@bimfli_files == 0) {
    foreach (@bim_files) {
	my $bitemp = $_;
	$bitemp =~ s/.bim$//;
	$bitemp .= ".hg19.ch.fl.bim";
	push @bimfli_files,$bitemp;
    }
}

#print @bimfli_files."\n";
print "@bimhg19_files\n" if ($debug);
#print "debug\n";
#sleep(10);
#exit;


### read flipped bim-files
my %bimfli_array = ();
foreach (@bimfli_files) {
    $bimfli_array{$_} = 1;
}


### read deployed bim-files
my %bimdep_array = ();
foreach (@bimdep_files) {
    $bimdep_array{$_} = 1;
}

### read flipped bim-files
my %bimpos_array = ();
foreach (@bimpos_files) {
    $bimpos_array{$_} = 1;
}

### read flipped bimref-files
my %bimref_array = ();
foreach (@bimref_files) {
    $bimref_array{$_} = 1;
}

### read flipped bim-files
my %bimhg19_array = ();
foreach (@bimhg19_files) {
    $bimhg19_array{$_} = 1;
}

### read flipped bim-files
my %fini_array = ();
foreach (@fini_files) {
    $fini_array{$_} = 1;
}

## name for log-files
$iname = $bimfli_files[0];
$iname = $bim_files[0] if ($iname eq "");
$iname =~ s/.bim$//;
$iname =~ s/qc2report_//;

#####################################
# prepare pi_subdir
#####################################

chdir ($impute_dir);

unless (-e "$rootdir/puting_done") {
    foreach (@bim_files) {
	my $bfile = $_;
	$bfile =~ s/.bim$//;
	&mysystem("ln -s $rootdir/$bfile.bim .") unless (-e "$bfile.bim");
	&mysystem("ln -s $rootdir/$bfile.bed .") unless (-e "$bfile.bed");
	&mysystem("ln -s $rootdir/$bfile.fam .") unless (-e "$bfile.fam");
    }
}


##########################################
## prepare trioset file
############################################

my %trioset;
my %trioset_bimfli;
if ($trioset_file){
    die $!." <$rootdir/$trioset_file>" unless open IN, "< $rootdir/$trioset_file";
    while (my $line = <IN>){
	my @cells = &split_line($line);
	$cells[0] =~ s/.bim$//;
	$cells[0] =~ s/.fam$//;
	$cells[0] .= ".hg19.ch.fl";
	$trioset{$cells[0]} = 1;
	print "$cells[0] is triodata\n";
    }
    close IN;
}
#exit;

foreach my $bifi (@bimfli_files) {
    $bifi =~ s/.bim$//;
    if (exists $trioset{$bifi}){
	$trioset_bimfli{$bifi} = 1;
    }
}


#######################################################
## set a single chromosome
######################################################

#my $chr_start=1;	
#my $chr_end=22;	
#if ($chr !=0 ){
#    $chr_start = $chr;
#    $chr_end = $chr;
#}




################################################
### set info text
####################################################


$info_txt = "command:\t\"$command_line\"\tdir:\t$rootdir";



############################################################
### refind array
#############################################################

my @refind_arr;
my @refstart_arr;
my @refend_arr;

my @refchr_arr; ## safes the chromosomes in same order
my @gema_arr; ## safes the gema_files in same order

#my @bgz_arr; ## safes the bgz files in same order, needed for eagle / minimac


my %gema_hash; ## contains the gema-files for each chromosome



if ($suminfo_s_v2_sw == 0) {
    foreach my $rf (@reffiles) {
	
	my $refind = $rf;
	if ($refind =~ /chr[0-9]*_[0-9]*_[0-9]*/){
	    $refind =~ s/.*(chr[0-9]*_[0-9]*_[0-9]*).*/\1/;
	}
	else {
	    $refind =~ s/.*(chr[0-9]*_[0-9]*).*/\1/;
	}

	
	
	push @refind_arr, $refind;
	print "refind: $refind\n" if ($debug);
	
	
	#    my $chrind = $rf;
	#    $chrind =~ s/.*(chr[0-9]*).*/\1/;
	#    $chrind =~ s/chr//;
	
	my @tcells = split /_/, $refind;
	my $mega_start = $tcells[1];
	my $mega_end = $tcells[2];
	
	my $chrind = $tcells[0];
	$chrind =~ s/chr//;
	#    print "chrind: $chrind\n";
	
	push @refchr_arr, $chrind;
	push @refstart_arr, $mega_start;
	push @refend_arr, $mega_end;
	
	my $gema_file = "$refdir/genetic_map_chr$chrind"."_combined_b37.txt";
	push @gema_arr, $gema_file;
	
	

	
	
	
	### test existence once per chromosome
	unless (exists $gema_hash{$chrind}) {
	    $gema_hash{$chrind} = 1;
	    die "$gema_file not existent" unless (-e $gema_file);
	}
	
    }
}



#############################################
### write out files for postimp
#############################################

unless (-e "$rootdir/reference_info") {

    print "write reference_info\n" if ($debug);
    
    die $! unless open REF, "> reference_info.tmp";
    print REF "$refdir\n";


    if ($suminfo_s_v2_sw ==1) {

	die $!." <$refdir/$suminfo_c>" unless open IN, "< $refdir/$suminfo_c";
	while (my $line = <IN>){
	    my @cells = &split_line($line);
	    print REF "chr$cells[0]"."_".$cells[1]."_".$cells[2]."\n";
	}
	close IN;


    }
    else {
	foreach (@refind_arr) {
	    print REF "$_\n";
	}
    }

    
    close REF;
    &mysystem ("mv reference_info.tmp $rootdir/reference_info");
    &mysystem ("cp $rootdir/reference_info $rootdir/refiex_templ");

    if (1) {
	foreach (@bimfli_files) {
	    my $bprefix = $_;
	    $bprefix =~ s/.bim$//;
	    &mysystem ("cp $rootdir/reference_info $rootdir/$bprefix.ref");
	    
	}
    }
}




#    unless (-e "$rootdir/datasets_info") {
#	die $! unless open BF, "> datasets_info.tmp";
#	foreach (@bimfli_files) {
#	    print BF "$_\n";
#	}
#	close BF;
#	&mysystem ("mv datasets_info.tmp $rootdir/datasets_info");
 #   }

#foreach my $ccc (0..$#refind_arr) {
#    print "$ccc\n";
#}
#print "debug\n";
#exit;


if ($reference_info_file) {
    print "exit after building refiex_templ\n";
    exit;
}

#####################################################
## check readref
########################################################


#my $readref_sw = 1;

my @chr_in;

if ($suminfo_s_v2_sw) {
    foreach my $chrloc(1..22) {
	my $reffi ="$refdir/$suminfo_s.$chrloc.gz";
	
	$reffi ="$refdir/$suminfo_s_v2.impute.plink.$popname_uc.frq2.gz";
	$reffi =~ s/XXX/$chrloc/;
	
	if (-e $reffi) {
	    push @chr_in, $chrloc;
	}
	else {
	    print "Warning: $reffi is not existing, skipping this chromosome\n" if ($debug);
	}
    }

    if (@chr_in == 0) {

	### test chr23
	my $chrloc = "X";
	my $reffi ="$refdir/$suminfo_s.$chrloc.gz";
	
	$reffi ="$refdir/$suminfo_s_v2.impute.plink.$popname_uc.frq2.gz";
	$reffi =~ s/XXX/$chrloc/;
	
	if (-e $reffi) {
	    push @chr_in, $chrloc;
	    print "found $reffi, this seems to be chrX imputation\n" if ($debug);
	}
	else {
	    print "Warning: $reffi is not existing, skipping this chromosome\n" if ($debug);
	}
	
	if (@chr_in == 0) {
	    print "Error: no frq2.gz file for this population found\n";
	    exit;
	}

    }
    
}


#if ($readref_sw == 1) {
#    print "efficient reference alignment switched on\n";
##}
#else {
#    print "efficient reference alignment switched off, please check refdir, will continue in 3 sec...\n";
#    sleep(3);
#}
#print "exit;\n";
#exit;

###################################
### test for legend file for DEPLOY
###################################


if ($deploy) {
    my $gw_legend_file = "$refdir/$suminfo_s_v2.impute.legend";
    unless (-e $gw_legend_file) {
	print "Error: this file is missing and necessary for deploying: $gw_legend_file\n";
	print "please rerun refdir_navi with on the reference you are using with a newer ricopili version\n";
	exit;
    }
}



###################################
### GUESS BUILD
###################################

my @buigue_arr = ();
my $buigue_fini = 0;

unless (-e "$rootdir/buigue_done") {
    unless (-e "$rootdir/posing_done") {
	foreach (@bim_files) {
	    my $bfile = $_;
	    $bfile =~ s/.bim$//;
	    my $fini ="$bfile".".bim.fini";
#	    print "he: $fini\n";
#	    exit;
	    unless (exists $fini_array{$fini}) {
		push @buigue_arr, "$buigue_script --lift19 $bfile.bim" ;#
	    }
	    else {
		$buigue_fini++;
	    }
	}

	if (@buigue_arr > 0) {
	    
	    $sjadir = $impute_dir;
	    $sjaname = "buigue";
	    
	    $sjatime = 2;
#	    $sjatime = 4 if ($buigue_fini > 0);
	    
	    $sjamem = 2000;
	    @sjaarray = @buigue_arr;
	    
	    &send_jobarray;
	}
	else {
	    &mysystem ("touch $rootdir/buigue_done");
	    print "build_guess done\n" if ($debug);
	}
    }
}


###################################
### READREF
###################################



my @readref_arr = ();


if ($suminfo_s_v2_sw == 1) {
    unless (-e "$rootdir/readref_done") {
	foreach (@bim_files) {
	    my $bimfile = $_;
	    my $bfile = $bimfile;
	    $bfile =~ s/.bim$//;
	    my $accfli ="$bfile".".hg19.bim";
	    my $bimref_done ="$accfli".".ref.sum.done";
	    unless (exists $bimref_array{$bimref_done}) {

		
		foreach my $chrloc(@chr_in) {
		    my $bimref ="$accfli".".ref.chr$chrloc";
		    my $reffi ="$refdir/$suminfo_s.$chrloc.gz";


		    $popname = uc $popname;
		    $reffi ="$refdir/$suminfo_s_v2.impute.plink.$popname_uc.frq2.gz";
		    $reffi =~ s/XXX/$chrloc/;




		    
		    if ($chrloc eq "X") {
			$bimref ="$accfli".".ref.chr23";
		    }

		    unless (-e $reffi) {
			print "Error: $reffi not existing\n";
			exit;
		    }

		    
		    unless (exists $bimref_array{$bimref}) {
			push @readref_arr, "$readref_script --chr $chrloc --ref $reffi $accfli" ;#
			#		print "$readref_script --chr $chrloc --ref $reffi $bimfile\n" ;#
		    }
		}
	    }
	}
#    exit;
	if (@readref_arr > 0) {
	    
	    $sjadir = $impute_dir;
	    $sjaname = "readref";
	    $sjatime = 2;
#	    $sjatime = 4 if ($readref_fini > 0);
	    
	    $sjamem = 2000;
	    $sjamaxpar = 100;
	    @sjaarray = @readref_arr;
	    


	    &send_jobarray;
	}
	else {
	    &mysystem ("touch $rootdir/readref_done");
	    print "readref done\n" if ($debug);
	}
    }
}

#print "debug\n";
#exit;

###################################
### sum readref
###################################

if ($suminfo_s_v2_sw == 1) {
    unless (-e "$rootdir/readrefsum_done") {

	my @readrefsum_arr = ();
	my $readrefsum_fini = 0;
	
	foreach (@bim_files) {

	    
	    my $bimfile = $_;
	    my $bfile = $bimfile;
	    $bfile =~ s/.bim$//;
	    my $accfli ="$bfile".".hg19.bim";

	    my $bimref_done ="$accfli".".ref.sum.done";
	    #		print "looking for $bimref_done\n";
	    unless (exists $bimref_array{$bimref_done}) {
		push @readrefsum_arr, "$readrefsum_script $accfli" ;#
	    }
	    else {
		$readrefsum_fini++;
	    }
	}
	
	if (@readrefsum_arr > 0) {
	    
	    #		print "stragne $readrefsum_arr[0]\n";
	    #		exit;
	    
	    $sjadir = $impute_dir;
	    $sjaname = "reresum";
	    $sjatime = 2;
	    #		$sjatime = 4 if ($readrefsum_fini > 0);
	    
	    $sjamem = 1000;
	    @sjaarray = @readrefsum_arr;
	    
	    &send_jobarray;
	}
	else {
	    &mysystem ("touch $rootdir/readrefsum_done");
	    print "readrefsum done\n" if ($debug);
	}
    }
}

#print "debug\n";
#exit;



###################################
### CHECKPOS
##################################################
## !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
### checkpos6 needs the var_chr_renaming (see my.readref) -> done
###################################

my @chepos_arr = ();
my $chepos_fini = 0;

#print "???\n";
#exit;

unless (-e "$rootdir/posing_done") {
    foreach (@bim_files) {
	my $bfile = $_;
	$bfile =~ s/.bim$//;
	my $locref = $bfile.".hg19.bim.ref.sum";
	my $accfli ="$bfile".".hg19.ch.bim";

	if (-e $locref) {
#	    print "locref $locref is existing! safes some time\n";
	    unless (exists $bimpos_array{$accfli}) {
		push @chepos_arr, "$checkpos_script --dbcol 1,2,3,4,5 --dbsnp $rootdir/$impute_dir/$locref $bfile.hg19.bim" ;#
#		print "$checkpos_script --dbcol 1,2,3 --dbsnp $rootdir/$impute_dir/$locref $bfile.hg19.bim\n" ;#

	    }
	    else {
		$chepos_fini++;
	    }
	}
	else {
	    print "locref $locref is not existing! would be better if it did\n" if ($debug);
	    unless (exists $bimpos_array{$accfli}) {
		push @chepos_arr, "$checkpos_script --dbcol 1,8,9,3,5 --dbsnp $refdir/$suminfo_s $bfile.hg19.bim" ;#
	    }
	    else {
		$chepos_fini++;
	    }
	}

    }
#    exit;

   
    if (@chepos_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "chepos";
	$sjatime = 2;
#	$sjatime = 4 if ($chepos_fini > 0);
	$sjamem = 8000;
	@sjaarray = @chepos_arr;
	
	&send_jobarray;
    }
    else {
	&mysystem ("touch $rootdir/posing_done");
	print "checkpos done\n" if ($debug);
    }
}

#print "debug\n";
#exit;

###################################
### CHECKFLIP
###################################

#print "checkflip3?\n";
#exit;

my @chefli_arr = ();
my $chefli_fini = 0;

unless (-e "$rootdir/flipping_done") {
    foreach (@bim_files) {
	my $bfile = $_;
	$bfile =~ s/.bim$//;
	my $accfli ="$bfile".".hg19.ch.fl.bim";
	my $locref = $bfile.".hg19.bim.ref.sum";

	if (-e $locref) {
	    #		print "locref $locref is existing! safes some time\n";
	    unless (exists $bimfli_array{$accfli}) {
		my $systmp = "$checkflip_script --dbcol 0,3,4,5 --fth $fth_th --sfh $sec_freq --info $rootdir/$impute_dir/$locref $bfile.hg19.ch.bim" ;
		push @chefli_arr, $systmp ;
		#		    print "$systmp\n";
		#		    exit;
		#		    push @chepos_arr, "$checkpos_script --dbcol 1,2,3 --dbsnp $rootdir/$impute_dir/$locref $bfile.hg19.bim" ;#
		
	    }
	    else {
		$chefli_fini++;
	    }
	}
	else {
	    print "locref $locref is not existing! would be better if it did\n" if ($debug);
	    unless (exists $bimfli_array{$accfli}) {
		print "$checkflip_script --fth $fth_th --sfh $sec_freq --info $refdir/$suminfo_s $bfile.hg19.ch.bim\n" if ($debug);
		push @chefli_arr, "$checkflip_script --fth $fth_th --sfh $sec_freq --info $refdir/$suminfo_s $bfile.hg19.ch.bim" ;
	    }
	    else {
		$chefli_fini++;
	    }
	}


    }

    #	exit;

    #	print "debug\n";
    #	sleep (10);

    
    if (@chefli_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "chefli";
	$sjatime = 2;
	#	    $sjatime = 4 if ($chefli_fini > 0);
	$sjamem = 8000;
	@sjaarray = @chefli_arr;
	
	&send_jobarray;
	
    }
    else {
	&mysystem ("touch $rootdir/flipping_done");
	print "checkflip done\n" if ($debug);
    }
}



#print "debug\n" ;  
#exit;

print "N_bimfli: ".@bimfli_files."\n" if ($debug);
print "N_bim: ".@bim_files."\n" if ($debug);

#print "debug\n";
#exit;
    



###############################################################################
#### read reference info
###########################################################################
my %info_n;
unless (-e "$rootdir/puting_done"){

    print "read reference_info into memory\n" if ($debug);
    die $!." <$refdir/$suminfo_n>" unless open IN, "< $refdir/$suminfo_n";
    
    while (my $line = <IN>){
	my @cells = &split_line($line);
	$info_n{$cells[1]} = $cells[0];
    }
    
    close IN;
}





###########################
#### here preparation of famfiles for shapeit
############################


print "prepare famfiles for shapeit\n" if ($debug);

unless (-e "$rootdir/puting_done") {
    foreach (@bim_files) {

	my $bprefix = $_;
	$bprefix =~ s/.bim$//;
	my %sex_hash = ();

	### include sex check for chrX
	if ($phas == 923 || $phas == 9123 || $phas == 91231) {
	    if (-e "$bprefix.hg19.ch.fl.fam") {
		unless (-e "$bprefix.hg19.ch.fl.sexcheck") {
		    my $sx = "$ploc/plink --silent --memory 2000  --bfile $bprefix.hg19.ch.fl --check-sex --out $bprefix.hg19.ch.fl";
		    &mysystem ($sx);
		}
		
		
		die $! unless open SI, "< $bprefix.hg19.ch.fl.sexcheck";
		while (my $line = <SI>){
		    my @cells = &split_line($line);
		    if ($cells[5] < .5) {
			$sex_hash{"$cells[0]\t$cells[1]"} = 2;
		    }
		    else {
			$sex_hash{"$cells[0]\t$cells[1]"} = 1;
		    }
		}
		close SI;
	    }
	}
	
	if (-e "$bprefix.hg19.ch.fl.fam"){ 
	    unless (-e "$bprefix.hg19.ch.fl.fam.idnum") {
		die $! unless open FI, "< $bprefix.fam";
		die $! unless open FO, "> $bprefix.hg19.ch.fl.fam.idnum.tmp";
		die $! unless open FT, "> $bprefix.hg19.ch.fl.fam.transl";
		my $cc = 1;
		while (my $line = <FI>){
		    my @cells = &split_line($line);
		    print FO "$cc $cc"; 

		    print FO " 0"; 
		    print FO " 0"; 

#		    print FO " $cells[2]"; 
#		    print FO " $cells[3]"; 
		    if (exists $sex_hash{"$cells[0]\t$cells[1]"}){
			print FO " ".$sex_hash{"$cells[0]\t$cells[1]"}; 
		    }
		    else {
			print FO " $cells[4]"; 
			if ($phas == 923 || $phas == 9123 || $phas == 91231) {
			    print "Error: no sex-check on X-chr?\n";
			    die;
			}
		    }
		    
		    print FO " $cells[5]\n";
		    print FT "$cc"; 
		    print FT " $cells[0]";
		    print FT " $cells[1]\n";
		    $cc++;
		}
		close FI;
		close FO;
		close FT;
		my $nloc = $cc -1;
		die $! unless open FN, "> $bprefix.hg19.ch.fl.fam.n";
		print FN $nloc."\n";
		close FN;
		&mysystem ("mv $bprefix.hg19.ch.fl.fam.idnum.tmp $bprefix.hg19.ch.fl.fam.idnum");
	    }
	}
    }
}






###################################
### PREPDEPLOY
###################################


if ($deploy) {
    my @prepdep_arr = ();
    my $prepdep_fini = 0;

    unless (-e "$rootdir/prepdep_done") {
	foreach (@bimfli_files) {
	    my $bprefix = $_;
	    $bprefix =~ s/.bim$//;
#	foreach (@bim_files) {
#	    my $bfile = $_;
#	    $bfile =~ s/.bim$//;


	    my $subdir_preimp_deploy = "deploy_preimp_$bprefix";
	    unless (-e $subdir_preimp_deploy) {
		&mysystem ("mkdir $subdir_preimp_deploy");
	    }
	    
	    my $subdir_postimp_deploy = "deploy_postimp_$bprefix";
	    unless (-e $subdir_postimp_deploy) {
		&mysystem ("mkdir $subdir_postimp_deploy");
	    }
	    
	    my $prepdep_fini = $subdir_preimp_deploy."/".$bprefix.".dep.fini";
	    my $bfile_aligned =$bprefix;
	    my $gw_legend_file = "$refdir/$suminfo_s_v2.impute.legend";
	    ;

	    unless (-e $prepdep_fini) {
		my $systmp = "$prepdep_script --outdir $subdir_preimp_deploy --famfile $bprefix.fam.idnum --legend $gw_legend_file --bfile $bfile_aligned" ;
		push @prepdep_arr, $systmp ;
		
	    }
	    else {
		$prepdep_fini++;
	    }

	}

	#	exit;

	#	print "debug\n";
	#	sleep (10);

	
	if (@prepdep_arr > 0) {
	    
	    $sjadir = $impute_dir;
	    $sjaname = "prepdep";
	    $sjatime = 2;
	    #	    $sjatime = 4 if ($chefli_fini > 0);
	    $sjamem = 2000;
	    @sjaarray = @prepdep_arr;
	    
	    &send_jobarray;
	    
	}
	else {
	    &mysystem ("touch $rootdir/prepdep_done");
	    print "prepare deploy done\n" if ($debug);
	}
    }
}

#exit;



#if ($suminfo_s_v2_sw) {
#    @reffiles = ();
#    die unless open IN, "< $refdir/infosum_pos.chunks";
#    while (my $line = <IN>) {
#	my @cells = &split_line($line);
	
#	my $refind = $cells[0]."_".$cells[1]."_".$cells[2];
#	push @reffiles, $refind;
#    }
#    close IN;

#}


#print "reffiles: @reffiles\n";
#exit;



#print "debug2\n";
#exit;


my @refind_arr_v2;
my @refstart_arr_v2;
my @refend_arr_v2;

my @refchr_arr_v2; ## safes the chromosomes in same order
my @gema_arr_v2; ## safes the gema_files in same order
#my @bgz_arr_v2; ## safes the gema_files in same order

if ($suminfo_s_v2_sw == 1) {
    
    die $!." <$refdir/$suminfo_c>" unless open IN, "< $refdir/$suminfo_c";
    while (my $line = <IN>){
	my @cells = &split_line($line);

	my $refind_loc = "chr$cells[0]"."_".$cells[1]."_".$cells[2];

	if (exists $refiex{$refind_loc}){
	    print "exclude chunk $refind_loc\n" if ($debug);
	    next;
	}

	
	push  @refind_arr_v2, $refind_loc;
	push  @refstart_arr_v2, $cells[1];
	push  @refend_arr_v2, $cells[2];
	push  @refchr_arr_v2, $cells[0];
	my $gema_file = "$refdir/genetic_map_chr$cells[0]"."_combined_b37.txt";
	#	    print $gema_file."\n";
	push @gema_arr_v2, $gema_file;
	
	### test existence once per chromosome
	unless ($deploy) {
	    unless (exists $gema_hash{$cells[0]}) {
		$gema_hash{$cells[0]} = 1;
		die "$gema_file not existent" unless (-e $gema_file);
	    }
	}
    }
    close IN;


}

#print "refind_arr:".@refind_arr_v2."\n";
#print "debug\n";
#exit;

if ($suminfo_s_v2_sw == 1) {
    
    @refind_arr = @refind_arr_v2;
    @refstart_arr = @refstart_arr_v2;
    @refend_arr = @refend_arr_v2;
    @refchr_arr = @refchr_arr_v2;
    @gema_arr = @gema_arr_v2;

}


if ($onlyalign) {
    print "stopping after alignment\n";
    exit;
}

#print "debug2\n";
#exit;


############################################################
### chucking
#############################################################


my $chucking_done = 1;
if (-e "$rootdir/chucking_done") {
    $chucking_done = 1;
}


my @chuck_arr = ();
my $chuck_fini = 0;
if ($chucking_done == 0) {
    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $subdir = "subbfile_$bprefix";
	unless (-e $subdir) {
	    &mysystem ("mkdir $subdir");
	}

	my $subdir_empty = "empty_$bprefix";
	unless (-e $subdir_empty) {
	    &mysystem ("mkdir $subdir_empty");
	}

	my $trio_sw = 0;
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;
	}

	my @finifiles = ();
	my %finifiles_hash = ();
	opendir(DIR, $subdir) || die "can't opendir .: $!";
	@finifiles = grep {/.fini$/} readdir(DIR);
	foreach (@finifiles) {
	    $finifiles_hash{$_} = 1;
	}
	closedir DIR;

	
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {

	    my $refind = $refind_arr[$ccc];
	    my $chrind = $refchr_arr[$ccc];
#	    print "chuck $chrind\n";
	    my $rf = $reffiles[$ccc];







	    my $extract_txt = "";
#	    my $buffer = 1000;
	    
	    if ($suminfo_s_v2_sw ==1) {
#		$buffer = 500;
		$extract_txt = "--refind $refind";
	    }
	    else {
		my $snps_extract = "$refdir/$rf.info_pos";
		unless (-e $snps_extract) {
		    print "Error: $snps_extract is not existing\n";
		    exit;
		}
		$extract_txt = "--sfile $snps_extract";
	    }


	    
	    my $plink_ch_out = "$subdir/plink.$bprefix.$refind";
	    my $plink_ch_out_fini = "plink.$bprefix.$refind.fini";
	    my $empty_ch_out = "$subdir_empty/$bprefix.$refind";
	    
	    
	    my $mendel_sw = "";
	    if ($trio_sw ==1) {
		$mendel_sw = "--mendel";
	    }

	   	    
	    
	    unless (exists $finifiles_hash{$plink_ch_out_fini} ) {
		push @chuck_arr, "$chuck_script --buffer $buffer --out $plink_ch_out --in $bprefix $extract_txt $mendel_sw --empty $empty_ch_out";
	    }
	    else {
		$chuck_fini++;
	    }
	    
	}
    }


    


###################################
### send chuck jobs
###################################

    if (@chuck_arr > 0) {

	$sjadir = $impute_dir;
	$sjaname = "chuck";
	$sjatime = 2;
#	$sjatime = 1 if ($chuck_fini > 0);
	$sjamem = 2000;
	@sjaarray = @chuck_arr;


#	print "$chuck_arr[0]\n";#
#	exit;
	
	&send_jobarray;

    }
    else {
	&mysystem ("touch $rootdir/chucking_done");
	print "chucking done\n" if ($debug);
    }
}
# die "debug_chuck";
#    print "debug\n";
#    exit;


############################################################
### prephase
#############################################################


my $prephase_done = 0;
if (-e "$rootdir/prephase_done" || $deploy) {
    $prephase_done = 1;
}

my $spliha_n_2= $spliha_n * 2;


my @preph_arr = ();
#my @preph_arr2 = ();
my $preph_fini = 0;
my @preph_arr_mu1 = ();
my @preph_arr_mu2 = ();
my @preph_arr_mu_v2 = ();


my $chr23male_txt = "";
$chr23male_txt = "--chr23male" if ($chr23male);


if ($prephase_done == 0) {
    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $subdir = "haps_$bprefix";
#	my $subdir_in = "subbfile_$bprefix";

	my $subdir_empty = "empty_$bprefix";
	unless (-e $subdir) {
	    &mysystem ("mkdir $subdir");
	}
	unless (-e $subdir_empty) {
	    &mysystem ("mkdir $subdir_empty");
	}

	my $trio_sw = 0;
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;
	}

	print "read emptydir....\n" if ($debug);
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;
	my %indirempty_hash = ();
	foreach (@indirempty) {
	    $indirempty_hash{$_} = 1;
	}


	my @dirfiles = ();
	my @finifiles = ();
#	my @errorfiles = ();
	my @mufiles = ();
	my %finifiles_hash = ();
#	my %errorfiles_hash = ();

	print "read outdir....\n" if ($debug);
	opendir(DIR, $subdir) || die "can't opendir .: $!";
	@dirfiles = readdir(DIR);
	@finifiles = grep {/\.fini$/} @dirfiles;
#	@errorfiles = grep {/\.error$/} @dirfiles;
	@mufiles = grep {/\.multi\..$/} @dirfiles;
	closedir DIR;

	foreach (@finifiles) {
	    $finifiles_hash{$_} = 1;
	}
#	foreach (@errorfiles) {
#	    $errorfiles_hash{$_} = 1;
#	}

	foreach (@mufiles) {
	    $finifiles_hash{$_} = 1;
	}

	if ($trio_sw == 1) {
	    unless (-e "$bprefix.fam.shape"){
		print "create shapefile for trios\n" if ($debug);
		my $sys = "$trisha_script $bprefix.fam";
		&mysystem ($sys);
	    }
	}
#	else {
#	    print "$bprefix is not a trio\n";
#	    print "sleep\n";#
#	    sleep(10);
#
#	}

	my $mendel_sw = "";
	my $famfile = "$bprefix.fam.idnum";
	if ($trio_sw ==1) {
	    $mendel_sw = "--mendel";
	    $famfile = "$bprefix.fam.shape";
	}


	print "start refdirloop....\n" if ($debug);
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {



	    my $refind = $refind_arr[$ccc];
	    my $chrind = $refchr_arr[$ccc];
	    my $rf = $reffiles[$ccc];
	    my $gema_file = $gema_arr[$ccc];


#	    my $bgz_file = $bgz_arr[$ccc];
	    my $bgz_file = "$refdir/$suminfo_s_v2.impute.bcf.bgz"; 
	    $bgz_file =~ s/XXX/$chrind/;
#	    push @bgz_arr, $bgz_file;
	    print $bgz_file."\n" if ($debug);

	    my $extract_txt = "";
	    
	    if ($suminfo_s_v2_sw ==1) {
		$extract_txt = "--refind $refind";
	    }
	    else {
		my $snps_extract = "$refdir/$rf.info_pos";
		unless (-e $snps_extract) {
		    print "Error: $snps_extract is not existing\n";
		    exit;
		}
		$extract_txt = "--sfile $snps_extract";
	    }


#	    print "bgz_file: $bgz_file\n";
#	    print "chrind: $chrind\n";
#	    print "$gema_file\n";
#	    print "$rf\n";

	    
	    my $multi_txt = "";
	    my $multi_sw = 0;
	    
#	    if ($suminfo_s_v2_sw ==1) {
#		$refind = $chrind;
#		$multi_txt = "--multi $multithread_v2";
#		$multi_sw = 3;
#	    }


	    
#	    print "$gema_file\n";
#	    exit;

#	    my $plink_in = "$bprefix";
	    my $empty_ch_out = "$subdir_empty/$bprefix.$refind.empty";
	    my $haps_ch_out = "$subdir/plink.$bprefix.$refind";
	    my $haps_ch_out_fini = "plink.$bprefix.$refind.fini";
	    my $haps_ch_out_mu0 = "plink.$bprefix.$refind.multi.0";
	    my $haps_ch_out_mu1 = "plink.$bprefix.$refind.multi.$multithread1";


#	    print "famfile: $famfile\n";
	    my $xtxt = "";
	    if ($phas == 923 || $phas == 9123 || $phas == 91231) {
		$xtxt = "--chrX";
	    }
	    
	    if ($chr23trios) {
		$xtxt = "--chrX";
	    }
	    

	    if (exists $finifiles_hash{$haps_ch_out_mu0} ) {
		$multi_txt = "--multi $multithread1";
		$multi_sw = 1;
	    }
	    if (exists $finifiles_hash{$haps_ch_out_mu1} ) {
		$multi_txt = "--multi $multithread2";
		$multi_sw = 2;
	    }


	    if (exists $indirempty_hash{$empty_ch_out} ) {
#		print "really existing: \n"
#		print "$bprefix.$refind.empty\n";
	    }
	    else {
		
		unless (exists $finifiles_hash{$haps_ch_out_fini} ) {

#		    print "didnt find this one: $haps_ch_out_fini\n";
#		    sleep (3);
		    
#		if (exists $finifiles_hash{$haps_ch_out_fini} ) {
#
#		}
		    ################################## work on this here!!!!!


		    my $backbone = "$preph_script $chr23male_txt $xtxt --spliha $spliha_n_2 --out $haps_ch_out --in $bprefix --fam $famfile --gema $gema_file --buffer $buffer $extract_txt $mendel_sw --empty $empty_ch_out";
		    
		    if ($eagle_sw) { 
			$backbone .= " --eagle";
		    }
		    
		    if ($mm3_sw) { 

			$backbone .= " --eaglvcf";
			$backbone .= " --ref $bgz_file";

			if ($multithread_v2 > 0) {
			    $backbone .= " --multi $multithread_v2";
#			    $sjamulti = $multithread_v2;
			}

			if ($noref) {
			    $backbone .= " --noref";
			}


		    }

#		    elsif ($imp4_sw) {
#			$backbone .= " --shapeit3";
#		    }

		    
		    if ($suminfo_s_v2_sw == 1) {
			$backbone .= " --nospli";
#			print "v2 found\n";
#			print "$multi_sw\n";
		    }

#		    print "$backbone\n";
#		    exit;


#		    print "$multi_sw\n";
#		    exit;

### multi_sw is outdated, not used right now...
#		    if ($multi_sw == 0) {
			push @preph_arr, $backbone;
#		    }


		    if (0) {
			
			#		    if ($multi_sw == 1) {
			#		    	push @preph_arr_mu1, $backbone." $multi_txt";
			#		    }
			#		    if ($multi_sw == 3) {
			#		    	push @preph_arr_mu_v2, $backbone." $multi_txt";
			#		    }



			
			#		    print "multi: $multi_sw\n";
			if ($multi_sw == 2) {
			    my $lastlog = `tail -1 $haps_ch_out.shape.log`;

			    #			my $bimn = `wc -l $plink_ch_out.bim`;
			    #			$bimn = $bimn * 1;
			    #			print "$lastlog: $bimn\n";
			    #			if ($lastlog =~ /fully missing individuals/) {
			    #			    if ($bimn < 20) {
			    #				print "setting empty: $bprefix.$refind.empty\n";
			    #				&mysystem ("touch $subdir_empty/$bprefix.$refind.empty");
			    #			    }
			    #			    else {
			    #				print "Warning setting empty: $bprefix.$refind.empty\n";
			    #				&mysystem ("touch $subdir_empty/$bprefix.$refind.empty");
			    #				&mysystem ("touch $subdir_empty/$bprefix.$refind.empty.error.nsnps");
			    #			    }
			    #			}
			    #			else {
			    push @preph_arr_mu2, $backbone." $multi_txt";
			    #			}
			}
		    }

		    
#		    print "$backbone\n";
#		    exit;
		}
		else {
#		    print "found this one: $haps_ch_out_fini\n";
		    $preph_fini++;
		}
	    }
	}
    }
    
    
    
    #    print "debug\n";
    #    exit;
    
    ###################################
    ### send prephase jobs
    ###################################
    
    if (@preph_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "preph";
	$sjatime = 2;
	#	$sjatime = 4 if ($preph_fini > 0);
	$sjamem = 2000;
	if ($multithread_v2 > 0) {
	    $sjamulti = $multithread_v2;
	    $sjamem = 4000;
	    $sjatime = 4;
	}
	if ($phaselong) {
	    $sjatime = 24;
	    $sjamem = 4000;
	}
	
	@sjaarray = @preph_arr;
	
	&send_jobarray;
	
    }
    

    
    
#    print "debug2\n";
#    exit;



    if (0) {
	if (@preph_arr_mu1 > 0) {
	    
	    $sjadir = $impute_dir;
	    $sjaname = "preph_mu1";
	    $sjatime = 2;
	    unless ($qloc eq "qsub_b") {
		$sjamulti = $multithread1;
	    }
	    $sjamem = 16000;
	    $sjacores = 4;
	    @sjaarray = @preph_arr_mu1;

	    &send_jobarray;

	}

	if (@preph_arr_mu_v2 > 0) {

	    $sjadir = $impute_dir;
	    $sjaname = "preph_mu_v2";
	    $sjatime = 2;
	    $sjamulti = $multithread_v2;

	    if ($preph_fini > 0) {
		$sjaweek = 1;
	    }
	    $sjamem = 12000;
	    $sjacores = 4;
	    @sjaarray = @preph_arr_mu_v2;

	    &send_jobarray;

	}
    }

#    print "full stop after preph_v2\n";
#    exit;

    
    if (@preph_arr_mu2 > 0) {

	# no multithread on broad makes another resub pointless, so fail here
	if ($qloc eq "qsub_b") {

	    $sjaname = "preph_mu2";
	    if (@preph_arr_mu2 < 10) {

		$sjadir = $impute_dir;
		$sjatime = 30;
		$sjamem = 16000;
		@sjaarray = @preph_arr_mu2;
		
		&send_jobarray;
	    }
	    else {
		    
		my $err_message ;
		$err_message .= "##################################################################\n";
		$err_message .= "##### Error: \n";
		$err_message .= "##### step $sjaname has been done repeatedly without any progress\n";
		$err_message .= "##### imputation pipeline stopped: $command_line\n";
		$err_message .= "##### $sjainfotxt\n";
		$err_message .= "##### if reason does not appear obvious\n";
		$err_message .= "##### have a look at the wiki page\n"; 
		$err_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n";
		$err_message .= "##### or contact the developers\n";
		$err_message .= "##################################################################\n";
		print "$err_message\n";
		
		die $! unless open ERR, "> error_file";
		print ERR $err_message."\n";
		close ERR;

		if ($noti == 1) {
		    
		    &mysystem ('cat error_file | '.$mutt_script.' -s RP_pipeline_error '.$email) ;

		}
		exit;
	    }
	} else {

	    $sjadir = $impute_dir;
	    $sjaname = "preph_mu2";
	    $sjatime = 4;
	    $sjaweek = 1;
	    $sjamulti = $multithread2;
	    $sjamem = 4000;
	    @sjaarray = @preph_arr_mu2;

	    &send_jobarray;

	}
    }
    else {
#	print "debug\n";
#	exit;
	&mysystem ("touch $rootdir/prephase_done");
	print "prephasing done\n" if ($debug);
    }
}
#print "debug\n";
#exit;


############################################################
### write empty file
#############################################################

unless (-e "$rootdir/empty_info") {
    die $! unless open EM, "> empty_info.tmp";


    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;
	my $subdir_empty = "empty_$bprefix";
#	print "read emptydir....\n";
	if (-e $subdir_empty) {
	    opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	    my @indirempty = grep {/\.empty$/} readdir(IND);
	    closedir IND;
	    foreach (@indirempty) {
		print EM "$_\n";
	    }
	}
    }
    close EM;

    &mysystem ("mv empty_info.tmp $rootdir/empty_info");
}




############################################################
### pseudo
#############################################################


my $pseudo_done = 0;
if (-e "$rootdir/pseudo_done" || $deploy) {
    $pseudo_done = 1;
}

my @pseudo_arr = ();
if ($pseudo_done == 0) {

    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

#	my $subdir = "pi_$bprefix";
	my $subdir_in = "haps_$bprefix";
	my $subdir_empty = "empty_$bprefix";

	unless (exists $trioset_bimfli{$bprefix}){
	    next;
	}

	my $nfam = `cat $bprefix.fam.n`;
	chomp($nfam);
	
	my $splitn =  int ($nfam / $spliha_n);
	$splitn++;

	print "read emptydir....\n" if ($debug);
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;

	my %indirempty_hash = map { $_ => 1 } @indirempty;


	my @dirfiles = ();
	my @finifiles = ();

	print "read outdir....\n" if ($debug);
	opendir(DIR, $subdir_in) || die "can't opendir .: $!";
	@dirfiles = readdir(DIR);
	@finifiles = grep {/\.fini$/} @dirfiles;
	closedir DIR;

	my %finifiles_hash = map { $_ => 1 } @finifiles;

	print "start pseudo refdirloop....\n" if ($debug);
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {

	    my $refind = $refind_arr[$ccc];
	    my $rf = $reffiles[$ccc];
	    my $haps_ch_out = "$subdir_in/plink.$bprefix.$refind";
	    my $empty_ch_out = "$bprefix.$refind.empty";
	    my $refchunk = "$refdir/$rf";

	    if (exists $indirempty_hash{$empty_ch_out} ) {
		print "$bprefix.$refind.empty\n" if ($debug);
	    }
	    else {
#		print "N:$splitn\n";
		foreach my $spn (1..$splitn) {
		    
		    my $haps_file_loc = "$haps_ch_out.haps";
		    my $haps_file_loc_sample = "$haps_ch_out.sample";
#		    my $haps_file_loc = "$haps_ch_out.haps.spli$spn";
#		    my $haps_file_loc_sample = "$haps_ch_out.haps.spli$spn.sample";
		    my $pseudo_ch_out = "$haps_ch_out.haps.spli$spn.trio";
		    my $pseudo_ch_out_fini = "plink.$bprefix.$refind.haps.spli$spn.trio.haps.fini";
		    my $chrX_txt  = "";
		    if ($phas == 923 || $phas == 9123 || $phas == 91231) {
			$chrX_txt  = "--chrX";
		    }
		    if ($chr23trios) {
			$chrX_txt = "--chrX";
		    }

		    unless (exists $finifiles_hash{$pseudo_ch_out_fini} ) {
			my $sys_loc = "$pseudo_script $chrX_txt --out $pseudo_ch_out $haps_file_loc $haps_file_loc_sample";
#			print "$pseudo_ch_out_fini\n";
#			print "$sys_loc\n";
			push @pseudo_arr, $sys_loc;
#			exit;
		    }
		}
	    }
	}
    }

    


###################################
### send pseudo jobs
###################################
    
    if (@pseudo_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "pseudo";
	$sjatime = 1;
	$sjamem = 1000;
	@sjaarray = @pseudo_arr;
	
	&send_jobarray;
	
    }
    else {
	&mysystem ("touch $rootdir/pseudo_done");
	print "pseudo done\n" if ($debug);
    }

}


#exit;


############################################################
### impute
#############################################################




my $imp_done = 0;
if (-e "$rootdir/imp_done"|| $deploy) {
    $imp_done = 1;
}

my $n_imp_done = 0;
my $n_vcf2dos_done = 0;
my @imp_arr = ();
my @vcf2dos_arr = ();
my $imp_fini = 0;
my $vcf2dos_fini = 0;
if ($imp_done == 0) {




    ### loop for datasets
    foreach (@bimfli_files) {

	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $subdir = "pi_$bprefix";
	my $subdir_in = "haps_$bprefix";
	my $subdir_empty = "empty_$bprefix";

	unless (-e $subdir) {
	    &mysystem ("mkdir $subdir");
	}


	my $trio_sw = 0;
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;
	}

	my $nfam = `cat $bprefix.fam.n`;
	chomp($nfam);
	
	my $splitn =  int ($nfam / $spliha_n);
	$splitn++;

	print "read emptydir....\n" if ($debug);
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;

	my %indirempty_hash = map { $_ => 1 } @indirempty;


	my @dirfiles = ();
	my @finifiles = ();
	my @sumfiles = ();

	print "read outdir....\n" if ($debug);
	opendir(DIR, $subdir) || die "can't opendir .: $!";
	@dirfiles = readdir(DIR);
	@finifiles = grep {/\.fini$/} @dirfiles;
	@sumfiles = grep {/\_summary$/} @dirfiles;
	closedir DIR;

	my %finifiles_hash = map { $_ => 1 } @finifiles;
	my %sumfiles_hash = map { $_ => 1 } @sumfiles;

	print "start imp refdirloop....\n" if ($debug);
	#### loop for reference chunks




	
	foreach my $ccc (0..$#refind_arr) {


	    my $refind = $refind_arr[$ccc];
	    my $refchr = $refchr_arr[$ccc];
	    my $refstart = $refstart_arr[$ccc];
	    my $refend = $refend_arr[$ccc];
	    my $gema_file = $gema_arr[$ccc];

	    my $rf = $reffiles[$ccc];
	    my $refchunk = "$refdir/$rf";
	    my $legend_file = "$refdir/$rf";
#	    my $buffer = 1000;

	    
	    

	    my $haps_ch_out = "$subdir_in/plink.$bprefix.$refind";


	    if ($suminfo_s_v2_sw == 1) {
#		$haps_ch_out = "$subdir_in/plink.$bprefix.$refchr";
		$refchunk = "$refdir/$suminfo_s_v2.impute";
		$refchunk =~ s/XXX/$refchr/;
		$legend_file = $refchunk;
		$legend_file .= ".legend.gz";
#		$buffer = 500;
	    }

	    

	    my $empty_ch_out = "$bprefix.$refind.empty";



	    if (exists $indirempty_hash{$empty_ch_out} ) {
		print "$bprefix.$refind.empty\n" if ($debug);
	    }
	    else {

		if ($suminfo_s_v2_sw == 1) {

		    my $haps_file_loc = "$haps_ch_out.haps";
		    my $haps_file_loc_sample = "$haps_ch_out.sample";

		    if ($trio_sw) {
			$haps_file_loc = "$haps_ch_out.haps.spli1.trio.haps";
			$haps_file_loc_sample = "$haps_ch_out.haps.spli1.trio.sample.i4";
		    }

		    my $pi_ch_out = "$subdir/plink.$bprefix.$refind.imp";
		    my $pi_ch_out_fini = "plink.$bprefix.$refind.imp.fini";
		    my $vcf2dos_fini = "plink.$bprefix.$refind.imp.GP.gz.fini";
		    my $pi_ch_out_summary = "plink.$bprefix.$refind.imp_summary";

		    my $xtxt = "";
		    
		    if ($phas == 923 || $phas == 9123 || $phas == 91231) {
			$xtxt = "--chrX $haps_file_loc_sample";
		    }

		    if ($chr23trios) {
			$xtxt = "--chrX $haps_file_loc_sample";
		    }


#		    print "not found (mm_out): $pi_ch_out_fini\n";
		    
		    unless (exists $finifiles_hash{$pi_ch_out_fini} ) {

#			print "no fini: $pi_ch_out_fini\n";
			#			print "no syumary?: $pi_ch_out_summary\n";
			my $empty = 0;
			if (exists $sumfiles_hash{ $pi_ch_out_summary }) {
			    my @errlog = `grep ERROR $subdir/$pi_ch_out_summary`;
			    if ($errlog[0] =~ /^ERROR: There are no type 2 SNPs/) {
				$empty = 1;
				print "empty: $pi_ch_out_summary\n" if ($debug);
			    }
#			    print "$errlog[0]\n";
#			    exit;
			}


			## couple different options with Minimac3
			my $mm3_txt = "";
			my $chr_txt = "";
			if ($mm3_sw) {
			    $haps_file_loc =~ s/haps$/vcf.gz/;
			    $refchunk .= ".m3vcf.gz";
			    $mm3_txt = "--minimac";
			    $chr_txt = "--chrind $refchr";
			}
			elsif ($imp4_sw) {
			    $mm3_txt = "--imp4";
			}

			if ($empty) {
			    &mysystem ("touch $subdir_empty/$bprefix.$refind.empty");
			}
			else {
			    my $sys_loc = "$imp_script $xtxt --out $pi_ch_out --in $haps_file_loc --refstart $refstart --refend $refend  --reffile $refchunk --buff $buffer --gema $gema_file $mm3_txt $chr_txt";


			    ##
			    ## multithread is not very efficient according to gonzalo
			    ##
#			    if ($multithread_v2 > 0) {
#				$sys_loc .= " --multi $multithread_v2";
#			    }


#			    print "$sys_loc\n";
#			    exit;
			    
			    push @imp_arr, $sys_loc;
			}
#			exit;
		    }
		    else {
			$imp_fini++;
			$n_imp_done++;
		    }

		    ####################################################
		    #### rewrite stuff if imputed with minimac3
		    ###################################################
		    if ($mm3_sw) {


#			print "not found (vcf2dosout): $vcf2dos_fini\n";
			
			unless (exists $finifiles_hash{$vcf2dos_fini} ) {

			    
			    my $vcf_file = $pi_ch_out.".dose.vcf.gz";

			    my $keep_txt = "";
			    $keep_txt = "--keepvcf" if ($noclean);
			    
			    my $sys_loc = "$vcf2dos_script --legend $legend_file --vcf $vcf_file --refstart $refstart --refend $refend --chr $refchr";
#			    print "$sys_loc\n";
#			    exit;
			    
			    push @vcf2dos_arr, $sys_loc;
			    
			}
			else {
			    $vcf2dos_fini++;
			    $n_vcf2dos_done++;
			}
		    }	
		}
		else {
		    #		print "N:$splitn\n";
		    foreach my $spn (1..$splitn) {
			    
			my $haps_file_loc = "$haps_ch_out.haps.spli$spn";
			my $haps_file_loc_sample = "$haps_ch_out.haps.spli$spn.sample";
			
			if ($trio_sw) {
			    $haps_file_loc = "$haps_ch_out.haps.spli$spn.trio.haps";
			    $haps_file_loc_sample = "$haps_ch_out.haps.spli$spn.trio.sample";
			}
			
			my $pi_ch_out = "$subdir/plink.$bprefix.$refind.haps.spli$spn";
			my $pi_ch_out_fini = "plink.$bprefix.$refind.haps.spli$spn.fini";
			
			my $xtxt = "";
			
			if ($phas == 923 || $phas == 9123 || $phas == 91231) {
			    $xtxt = "--chrX $haps_file_loc_sample";
			}
			
			unless (exists $finifiles_hash{$pi_ch_out_fini} ) {
			    my $sys_loc = "$imp_script $xtxt --out $pi_ch_out --in $haps_file_loc --refstart $refstart --refend $refend  --reffile $refchunk  --gema $gema_file";
#			    print "$sys_loc\n";
			    push @imp_arr, $sys_loc;
			    #			exit;
			}
			else {
			    $imp_fini++;
			    $n_imp_done++;
			}
		    }
		}
	    }
	}
    }

#    print "debug2\n";
#    exit;
    


###################################
### send impute jobs
###################################
    
    if (@imp_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "imp";
	$sjatime = 2;
#	$sjamem = 16000;
	$sjamem = 2000;


#	if ($multithread_v2 > 0) {
#	    $sjamulti = $multithread_v2;
#	    $sjamem = 2000;
#	}

	if ($minilong) {
	    $sjatime = 24;
	    $sjamem = 4000;
	}

	$sjatime += $impwallinc;

#	if ($imp_fini > 0) {
#	    $sjamem = 16000;
#	    $sjatime = 4;
	#	}
	
#	if ($mm3_sw) {
#	    $sjamulti = 4;
#	    $sjamem = 1000;
#	}
	
	@sjaarray = @imp_arr;
	
#	print "$sjaarray[0]\n";
#	print "debug\n";
#	exit;
	&send_jobarray;
	
    }




###################################
### send vcf2dos jobs (only for minimac3
###################################
    
    if (@vcf2dos_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "vcf2dos";
	$sjatime = 2;
	$sjamem = 4000;

	if ($minilong) {
	    $sjatime = 4;
	    $sjamem = 6000;
	}
	
	@sjaarray = @vcf2dos_arr;
	
	&send_jobarray;
	
    }

    
    &mysystem ("touch $rootdir/imp_done");
    print "imp done\n" if ($debug);

}



############################################################
### deployed impute
#############################################################




my $impdeploy_done = 0;
unless ($deploy) {
    $impdeploy_done = 1;
}
if (-e "$rootdir/impdeploy_done") {
    $impdeploy_done = 1;
}


my $n_vcf2dos_deploy_done = 0;
my @vcf2dos_deploy_arr = ();
my $vcf2dos_deploy_fini = 0;


if ($impdeploy_done == 0) {


#    print "bimfli_N: ".@bimfli_files."\n";
    ### loop for datasets
    foreach (@bimfli_files) {

	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $subdir = "pi_$bprefix";
	my $subdir_in = "deploy_postimp_$bprefix";
	my $subdir_empty = "empty_$bprefix";

	unless (-e $subdir) {
	    &mysystem ("mkdir $subdir");
	}

	
	## not sure this is needed
	#	print "read emptydir....\n";
	#	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	#	my @indirempty = grep {/\.empty$/} readdir(IND);
	#	closedir IND;
	#	my %indirempty_hash = map { $_ => 1 } @indirempty;


	my @dirfiles = ();
	my @finifiles = ();
	#	my @sumfiles = ();

	print "read outdir....\n"  if ($debug);
	opendir(DIR, $subdir) || die "can't opendir .: $!";
	@dirfiles = readdir(DIR);
	@finifiles = grep {/\.fini$/} @dirfiles;
	#	print "finifiles: @finifiles\n";
	#	sleep (3);
	#	exit;
	#	@sumfiles = grep {/\_summary$/} @dirfiles;
	closedir DIR;

	my %finifiles_hash = map { $_ => 1 } @finifiles;
	#	my %sumfiles_hash = map { $_ => 1 } @sumfiles;

	print "start vcf2dos_deploy refdirloop....\n"  if ($debug);
	#### loop for reference chunks

	## not needed here, but maybe later, usually done in prephasing step
	my $subdir_empty = "empty_$bprefix";
	#	print "$subdir_empty\n";
	unless (-e $subdir_empty) {
	    &mysystem ("mkdir $subdir_empty");
	}
	#	sleep(3);

#	print "refind_N: ".@refind_arr."\n";
	foreach my $ccc (0..$#refind_arr) {

	    

	    my $refind = $refind_arr[$ccc];
	    my $refchr = $refchr_arr[$ccc];
	    my $refstart = $refstart_arr[$ccc];
	    my $refend = $refend_arr[$ccc];
	    my $gema_file = $gema_arr[$ccc];

	    my $rf = $reffiles[$ccc];

	    #	    my $buffer = 1000;

#	    print "refind: $refind\n";
	    
	    #	    my $deploy_out = "$subdir_in/$refchr.vcf.gz";

	    my $refchunk = "$refdir/$suminfo_s_v2.impute";
	    $refchunk =~ s/XXX/$refchr/;
	    my $legend_file = $refchunk;
	    $legend_file .= ".legend.gz";

	    #####################
	    ######## here work
	    #################
	    my $outname = "$subdir/plink.$bprefix.$refind";

	    my $empty_ch_out = "$bprefix.$refind.empty";



	    #	    if (exists $indirempty_hash{$empty_ch_out} ) {
	    #		print "$bprefix.$refind.empty\n";
	    #	    }
	    #	    else {

	    my $vcf2dos_deploy_fini = "plink.$bprefix.$refind.imp.GP.gz.fini";



	    ## not sure about empty for deployed jobs
	    #		unless (exists $finifiles_hash{$pi_ch_out_fini} ) {
	    #		    if ($empty) {
	    #			&mysystem ("touch $subdir_empty/$bprefix.$refind.empty");
	    #		    }
	    #		}

	    ####################################################
	    #### vcf2dos for deployed imputation
	    ###################################################
	    
	    unless (exists $finifiles_hash{$vcf2dos_deploy_fini} ) {
		
		#		print "$vcf2dos_deploy_fini does not exist\n";
		#		sleep(1);
		#		    my $vcf_file = $pi_ch_out.".dose.vcf.gz";
		my $vcf_file = "$subdir_in/$refchr.vcf.gz"; ## sanger format
		
		unless (-e $vcf_file) {
		    $vcf_file = "$subdir_in/chr$refchr.dose.vcf.gz"; ## michigan format
#		    $vcf_file = "$subdir_in/chr_$refchr.zip"; ## michigan format, may 2019    
		    unless (-e $vcf_file) {
			print "------------------------------------------------------------------------------------------------------------\n";
			print "Error: $vcf_file not found\n";
			print "has this dataset been already imputed on the imputation server, received back and copied to the right place?\n";
			print "------------------------------------------------------------------------------------------------------------\n";
			exit;
		    }
		}
		my $keep_txt = "";
		$keep_txt = "--keepvcf" if ($noclean);
		
		my $sys_loc = "$vcf2dos_script --legend $legend_file --vcf $vcf_file --refstart $refstart --refend $refend --chr $refchr --outname $outname";
		#		    print "$sys_loc\n";
		#		    exit;
		
		push @vcf2dos_deploy_arr, $sys_loc;
		
	    }
	    else {
		print "this is existing: ".$finifiles_hash{$vcf2dos_deploy_fini}."\n"  if ($debug);
		$vcf2dos_deploy_fini++;
		$n_vcf2dos_deploy_done++;
	    }
	    #	    }
	}
    }
    
    #    print "debug2\n";
    #    exit;
    ###################################
    ### send vcf2dos deploy jobs 
    ###################################
    
    if (@vcf2dos_deploy_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "vcf2dos_deploy";
	$sjatime = 2;
	$sjamem = 4000;

	if ($minilong) {
	    $sjatime = 4;
	    $sjamem = 6000;
	}
	
	@sjaarray = @vcf2dos_deploy_arr;
	
	&send_jobarray;
	
    }

    
    &mysystem ("touch $rootdir/impdeploy_done");
    print "impdeploy done\n"  if ($debug);

}

    

#if ($deploy) {
#    print "debug3\n";
#    exit;
#}

############################################################
### dosing
#############################################################


my $dos_done = 0;
if (-e "$rootdir/dos_done") {
    $dos_done = 1;
}



my $dasudir = "$rootdir/dasu";
#print "dasudir: $dasudir\n";
#exit;

my @dos_arr = ();
my $dos_fini = 0;
if ($dos_done == 0) {
    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;

	my $dasudir_loc = "$dasudir"."_$bprefix";
#	print "$dasudir_loc\n";
	unless (-e $dasudir_loc) {
	    &mysystem("mkdir $dasudir_loc");
	}


#	my $subdir = "pi_$bprefix";
	my $subdir_in = "pi_$bprefix";
	my $subdir_empty = "empty_$bprefix";


	my $trio_sw = 0;
	my $trio_prefix = "";
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;

	    ## read out the prefix
	    die $!."$bprefix.fam" unless open FILE, "< $bprefix.fam";
	    my $line = <FILE>;
	    my @cells = split '\*', $line;
	    close FILE;

	    $trio_prefix = $cells[0];
	}

	my $nfam = `cat $bprefix.fam.n`;
	chomp($nfam);
	
	my $splitn =  int ($nfam / $spliha_n);
	$splitn++;

	print "read emptydir....\n"  if ($debug);
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;

	my %indirempty_hash = map { $_ => 1 } @indirempty;


	my @dirfiles = ();
	my @finifiles = ();

	print "read outdir....\n"  if ($debug);
	opendir(DIR, $dasudir_loc) || die "can't opendir .: $!";
	@finifiles = grep {/\.out.dosage.fini$/} readdir(DIR);
	closedir DIR;

#	print "<$finifiles[0]>\n";

	my %finifiles_hash = map { $_ => 1 } @finifiles;

	print "start dos refdirloop....\n"  if ($debug);
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {

	    my $refind = $refind_arr[$ccc];
	    my $refchr = $refchr_arr[$ccc];
#	    my $haps_ch_out = "$subdir_in/plink.$bprefix.$refind";
	    my $empty_ch_out = "$bprefix.$refind.empty";
#	    my $refchunk = "$refdir/$rf";


	    my $dos_ch_out = "dos_$bprefix.$refind";
	    my $dos_ch_out_fini = "dos_$bprefix.$refind.out.dosage.fini";

	    my $plink_ch_out = "subbfile_$bprefix/plink.$bprefix.$refind";
	    my $famname_loc = "$plink_ch_out.fam";

	    my $prefix_loc = "";



	    if (exists $indirempty_hash{$empty_ch_out} ) {
		print "$bprefix.$refind.empty\n"  if ($debug);
	    }
	    else {
#		print "N:$splitn\n";
		my $puter_out_arr = "";
		foreach my $spn (1..$splitn) {
		    my $pi_ch_out = "$subdir_in/plink.$bprefix.$refind.haps.spli$spn.gz";
		    $puter_out_arr .= " $pi_ch_out";
		}


#		my $transfam_txt = "";
		
		if ($suminfo_s_v2_sw == 1) {


		    $plink_ch_out = $bprefix;


		    $famname_loc = "$plink_ch_out.fam";
#		    $transfam_txt = "--transfam $famname_loc.transl";

		    if ($trio_sw == 1){
			$famname_loc = "haps_$bprefix/plink.$bprefix.$refind.haps.spli1.trio.fam";
			$prefix_loc = "--prefix $trio_prefix --nodosfam";

		    }
		    
#		    print "famname_loc: $famname_loc\n";
#		    exit;
			
		    $puter_out_arr = "$subdir_in/plink.$bprefix.$refind.imp.gz";

		    if ($mm3_sw || $deploy) {
			$puter_out_arr = "$subdir_in/plink.$bprefix.$refind.imp.GP.gz";
		    }
		    
		}
		


		   		    
		unless (exists $finifiles_hash{$dos_ch_out_fini} ) {
#		    print "really not existing: $dos_ch_out_fini\n ";
#		    exit;
		    ################################## work on this here!!!!!
		    my $deploy_txt = "";
#		    if ($deploy) {
#			$deploy_txt = "--deployfam";
#		    }
		    my $sys_loc = "";
		    $sys_loc = "$dos_script $deploy_txt $prefix_loc --outname $dos_ch_out --outdir $dasudir_loc  --chr $refchr --fam $famname_loc --bim $plink_ch_out.bim $puter_out_arr";


#		    print "$sys_loc\n";
#		    exit;


#		    my $sys_loc = "my.imp --out $pi_ch_out --in $haps_file_loc --refstart $refstart --refend $refend  --reffile $refchunk  --gema $gema_file";
		    #		    print "$sys_loc\n";
#		    print "$sys_loc\n";
#		    exit;
		    push @dos_arr, $sys_loc;
#		    exit;
		}
		else {
		    $dos_fini++;
		}

	    }
	}
    }

#    print "debug\n";
#    exit;    


###################################
### send dos jobs
###################################
    
    if (@dos_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "dos";
	$sjatime = 2;
	#	$sjatime = 4 if ($dos_fini > 0);

	if ($minilong) {
	    $sjatime = 24;
	}
	
	$sjamem = 2000;
	$sjamaxpar = 100;
	@sjaarray = @dos_arr;
	
	&send_jobarray;
	
    }
    else {
	&mysystem ("touch $rootdir/dos_done");
	print "dos done\n"  if ($debug);
    }
}



########################################
## clean pi_sub
#############################################

chdir ($rootdir);


unless ($noclean) {
    
    print "start cleaning\n"  if ($debug);
    my @clean_arr_pisub;    

    unless (-e "$rootdir/clean_pisub_done"){


	foreach (@bimfli_files) {
	    my $bprefix = $_;
	    $bprefix =~ s/.bim$//;

	    my $pi_loc = "$rootdir/$impute_dir"."/pi_$bprefix";
	    my $haps_loc = "$rootdir/$impute_dir"."/haps_$bprefix";


	    if (-e "$pi_loc") {
		push @clean_arr_pisub,  "$cleandir_script $pi_loc" unless (-e "$pi_loc/cleaned");
	    }
	    if (-e "$haps_loc") {
		push @clean_arr_pisub,  "$cleandir_script $haps_loc" unless (-e "$haps_loc/cleaned");
	    }

	}


	my $pi_eo_loc = "$rootdir/$impute_dir"."/errandout";
	if (-e "$pi_eo_loc") {
	    push @clean_arr_pisub,  "$cleanerrandout_script $pi_eo_loc" unless (-e "$pi_eo_loc/cleaned");    
	}




	###################################
	### send clean jobs
	###################################

	if (@clean_arr_pisub > 0) {

#	    	foreach (@clean_arr) {
#	    	    print "$_\n";
#	    	}
#	    	exit;


	    $sjadir = $rootdir;
	    $sjaname = "clean_pisub";
	    $sjatime = 2;
	    $sjamem = 1000;
	    $sjamaxpar = 100;
	    @sjaarray = @clean_arr_pisub;

	    &send_jobarray;

	}
	else {
	    &mysystem ("touch $rootdir/clean_pisub_done");
	    print "cleaning pisub done\n"  if ($debug);
	}

    }
}
else {
    print "####################################################################################################\n";
    print "--noclean was switched on, please be aware that a lot of intermedieate files are kept right now\n";
    print "####################################################################################################\n";
}

chdir ($impute_dir);




#print "debug after dos\n";
#exit;

############################################################
### best guess
#############################################################


my $dabg_done = 0;
if (-e "$rootdir/dabg_done") {
    $dabg_done = 1;
}


my @dabg_arr = ();
my $dabg_fini = 0;
if ($dabg_done == 0) {
    ### loop for datasets
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;


	my $dasudir_loc = "$dasudir"."_$bprefix";
	my $dasudir_loc_qc1 = "$dasudir"."qc1_$bprefix";
	unless (-e "$dasudir_loc_qc1/qc1") {
	    &mysystem("mkdir -p $dasudir_loc_qc1/qc1");
	}

	my $subdir_empty = "empty_$bprefix";

	my $trio_sw = 0;
	if (exists $trioset_bimfli{$bprefix}){
	    $trio_sw = 1 ;
	}

	print "read emptydir....\n"  if ($debug);
	opendir(IND, $subdir_empty) || die "can't opendir .: $!";
	my @indirempty = grep {/\.empty$/} readdir(IND);
	closedir IND;
	my %indirempty_hash = map { $_ => 1 } @indirempty;


	print "read outdir....\n"  if ($debug);
	opendir(DIR, "$dasudir_loc_qc1/qc1") || die "can't opendir : $!";
	my @finifiles = grep {/\.out.dosage.gz.fini$/} readdir(DIR);
	closedir DIR;
#	print "@finifiles\n";
#	print "$dasudir_loc_qc1\n";

	my %finifiles_hash = map { $_ => 1 } @finifiles;

	
	#### loop for reference chunks
	foreach my $ccc (0..$#refind_arr) {

	    my $refind = $refind_arr[$ccc];
	    my $rf = $reffiles[$ccc];

	    my $dabgname = "$bprefix.$refind";
	    my $dasuqc1_fini = "dos_$bprefix.$refind.out.dosage.gz.fini";
	    my $empty_ch_out = "$bprefix.$refind.empty";
#	    print "$dasuqc1_fini\n";

	    if (exists $indirempty_hash{$empty_ch_out} ) {
		print "$bprefix.$refind.empty\n"  if ($debug);
	    }
	    else {
		unless (exists $finifiles_hash{$dasuqc1_fini} ) {
		    my $options_tmp = "--info_th $info_th --freq_th $freq_th --bg_th $bg_th";
		    my $sys_loc = "$dabg_script $options_tmp --indir $dasudir_loc --outdir $dasudir_loc_qc1 $dabgname";
		    push @dabg_arr,$sys_loc;
		    
#		print "$sys_loc\n";
#		exit;
		}
		else {
		    $dabg_fini++;
		}
	    }
	}
    }

    
#    exit;

###################################
### send dabg jobs
###################################

    if (@dabg_arr > 0) {

	$sjadir = $impute_dir;
	$sjaname = "dabg";
	$sjatime = 2;
	#	$sjatime = 20 if ($dabg_fini == 0);
	
	if ($minilong) {
	    $sjatime = 24;
	}
	
	$sjamem = 1000;
	$sjamaxpar = 100;
	@sjaarray = @dabg_arr;

	&send_jobarray;

    }
    else {
	&mysystem ("touch $rootdir/dabg_done");
	print "dabg done\n"  if ($debug);
    }
}


########################################
## clean dasudir
#############################################

chdir ($rootdir);


unless ($noclean) {
    
    print "start cleaning\n"  if ($debug);
    my @clean_arr_dasu;    

    unless (-e "$rootdir/clean_dasudir_done"){


	foreach (@bimfli_files) {
	    my $bprefix = $_;
	    $bprefix =~ s/.bim$//;
	    my $dasudir_loc = "$dasudir"."_$bprefix";	
	    if (-e "$dasudir_loc") {
		push @clean_arr_dasu,  "$cleandir_script $dasudir_loc" unless (-e "$dasudir_loc/cleaned");
	    }
	}


	###################################
	### send clean jobs
	###################################

	if (@clean_arr_dasu > 0) {

	    $sjadir = $rootdir;
	    $sjaname = "clean_dasudir";
	    $sjatime = 2;
	    $sjamem = 1000;
	    @sjaarray = @clean_arr_dasu;

	    &send_jobarray;

	}
	else {
	    &mysystem ("touch $rootdir/clean_dasudir_done");
	    print "cleaning dasudir done\n"  if ($debug);
	}

    }
}
else {
    print "####################################################################################################\n";
    print "--noclean was switched on, please be aware that a lot of intermedieate files are kept right now\n";
    print "####################################################################################################\n";
}

chdir ($impute_dir);





############################################################
#### write chunks empty after qc
##############################################################

unless (-e "$rootdir/qc1empty_info") {
    die $! unless open FAILINF, "> qc1empty_info.tmp";

    
    foreach (@bimfli_files) {
        my $bprefix = $_;
        $bprefix =~ s/.bim$//;

	my $dasudir_loc_qc1 = "$dasudir"."qc1_$bprefix";
	opendir(DIR, "$dasudir_loc_qc1/qc1") || die "can't opendir : $!";
	my @qc1fall_files = grep {/\.out.dosage.gz.empty$/} readdir(DIR);
	closedir(DIR);
	foreach (@qc1fall_files) {
	    print FAILINF "$_\n";
	}
    }
    close FAILINF;

    &mysystem ("mv qc1empty_info.tmp $rootdir/qc1empty_info");
}




##################################################################################################################################
##################################################################################################################################
##################################################################################################################################


if (@bimfli_files == 0) {
    if (@bim_files > 0) {
	foreach (@bim_files) {
	    $_ =~ s/.bim$//;
	    push @bimfli_files, $_.".hg19.ch.fl";
	}
    }
}
if (@bimfli_files == 0 ) {
    print "Error: no datasets\n";
    exit;
}

#print @reffiles.": reffiles\n";
#print "debug\n";
#sleep (10);

#####################################
## combine best guess genotypes
#####################################


my $cobg_dir = "$rootdir/cobg_dir_$outname";    
if (0) {

    unless (-e $cobg_dir) {
	&mysystem ("mkdir -p $cobg_dir");
    }
    print "start cobg\n"  if ($debug);
    my @cobg_arr;
    my @prune_arr;   
    my @merge_arr; 
    unless (-e "$rootdir/cobg_done"){

	foreach my $rf (@reffiles) {
	    my $refind = $rf;
	    if ($refind =~ /chr[0-9]*_[0-9]*_[0-9]*/){
		$refind =~ s/.*(chr[0-9]*_[0-9]*_[0-9]*).*/\1/;
	    }
	    else {
		$refind =~ s/.*(chr[0-9]*_[0-9]*).*/\1/;
	    }
	    #	print "$refind\n";
	    
	    my $cobg_fini = "$impute_dir/cobg.$outname.$refind.fini";

	    my $arrs = @cobg_arr;
	    #	print "$cobg_fini, n = $arrs\n";

	    my $in = 1;
	    unless (-e $cobg_fini) {
		
		my $in_list = "";
		foreach (@bimfli_files) {
		    my $bprefix = $_;
		    $bprefix =~ s/.bim$//;
		    #		print "PREFIX: $bprefix\n";		
		    
		    my $dasudir_loc_qc1 = "$dasudir"."qc1_$bprefix";
		    my $bfile_loc = "$dasudir_loc_qc1/bgs/dos_$bprefix.$refind.out.dosage.gz.qc2";
		    my $bfile_check = "$dasudir_loc_qc1/bgs/dos_$bprefix.$refind.out.dosage.gz.qc2.bim";
		    #		my $bfile_check = "$dasudir_loc_qc1/qc1/dos_$bprefix.$refind.out.dosage.gz.empty";
		    if (-e $bfile_check) {
			$in_list .= " $bfile_loc";		
		    }
		    else {
			print " $bfile_loc has no $bfile_check\n"  if ($debug);		
			$in = 0;
			#		    exit;
		    }
		}

		my $sys_loc = "$cobg_script --targetdir $rootdir/cobg_dir_$outname --out cobg.$outname.$refind $in_list";
		#	    print "inist: $sys_loc\n";
		#	    exit;

		
		if ($in_list ne "") {
		    if ($in == 1){
			push @cobg_arr,$sys_loc;
		    }

		    #		if ($in == 0){
		    #		    print "$sys_loc\n";
		    #		    exit;
		    #		}
		}

	    }
	    else {
		my $prune_fini = "$cobg_dir/prune.cobg.$outname.$refind.fini";
		my $prune_out = "$cobg_dir/prune.cobg.$outname.$refind.out";
		unless (-e $prune_fini) {
		    unless (-e $prune_out) {

			if (-e "$cobg_dir/cobg.$outname.$refind.bim") {
			    my $sys_loc = "$prune_script cobg.$outname.$refind";
			    push @prune_arr,$sys_loc;
			}

		    }
		}
		else {
		    my $bfile_loc = "prune.bfile.cobg.$outname.$refind";
		    if (-e "$cobg_dir/$bfile_loc.bim") {
			unless (-e "$cobg_dir/prune.bfile.cobg.$outname.fini") {
			    push @merge_arr, $bfile_loc;
			}
		    }
		}

	    }
	}
    }

    #print "@cobg_arr\n";
    #print "debug\n";
    #exit;










    chdir ($impute_dir);

    #my $n_cobg = @cobg_arr;
    #print "NCOBG: $n_cobg\n";
    #sleep(10);


    ###################################
    ## combine the best-guess chunks
    ###################################

    if (@cobg_arr > 0) {
	
	$sjadir = $cobg_dir;
	$sjaname = "cobg";
	$sjatime = 2;
	$sjamem = 1000;
	@sjaarray = @cobg_arr;
	
	&send_jobarray;
	
    }
    else {
	print "cobg done\n"  if ($debug);
    }



    ###################################
    ## prune best-guess chunks
    ###################################


    if (@prune_arr > 0) {
	
	$sjadir = $cobg_dir;
	$sjaname = "prune";
	$sjatime = 2;
	$sjamem = 1000;
	@sjaarray = @prune_arr;
	
	&send_jobarray;
	
    }
    else {
	print "prune_bg done\n"  if ($debug);
    }



    ###################################
    ## merge pruned best-guess chunks
    ###################################

    if (@merge_arr > 0) {


	chdir ("$cobg_dir");

	my $fbfile = shift(@merge_arr);
	open ML, ">", "MERGE_list" or die $!;
	foreach my $bf (@merge_arr) {
	    print ML "$bf.bed $bf.bim $bf.fam\n";
	}
	close ML;

	my @merge_job;
	push @merge_job, "$merge_script prune.bfile.cobg.$outname $fbfile MERGE_list";
	
	$sjadir = $cobg_dir;
	$sjaname = "merge";
	$sjatime = 2;
	$sjamem = 2000;
	@sjaarray = @merge_job;
	
	&send_jobarray;
	
    }
    else {
	&mysystem ("touch $rootdir/cobg_done");
	print "merge pruned combined done\n"  if ($debug);
    }


}



####################################################################################################
#### combine genome wide
####################################################################################


#####################################
## combine best guess genotypes
#####################################

my $cobg_gw_dir = "$rootdir/cobg_dir_genome_wide";    
unless (-e $cobg_gw_dir) {
    &mysystem ("mkdir -p $cobg_gw_dir");
}
print "start cobg genome wide\n"  if ($debug);
my @cobg_gw_arr;
my $cobg_gw_fini= 0;    

unless (-e "$rootdir/cobg_gw_done"){
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;
	my $cobg_bgn_fini = "$cobg_gw_dir/$bprefix.bgn.fini";
	unless (-e $cobg_bgn_fini) {	    
	    my $dasudir_loc_bgn = "$dasudir"."qc1_$bprefix/bgn";
	    my $sys_loc = "$cobg_gw_script --targetdir $cobg_gw_dir --out $bprefix.bgn $dasudir_loc_bgn";
#	    print "$sys_loc\n";
	    push @cobg_gw_arr,$sys_loc;
	}
	else {
	    $cobg_gw_fini++;
	}
	

	my $cobg_bg_fini = "$cobg_gw_dir/$bprefix.bg.fini";
	unless (-e $cobg_bg_fini) {	    
	    my $dasudir_loc_bg = "$dasudir"."qc1_$bprefix/bg";
	    my $sys_loc = "$cobg_gw_script --targetdir $cobg_gw_dir --out $bprefix.bg $dasudir_loc_bg";
#	    print "$sys_loc\n";
	    push @cobg_gw_arr,$sys_loc;
	}
	else {
	    $cobg_gw_fini++;
	}

	my $cobg_bgs_fini = "$cobg_gw_dir/$bprefix.bgs.fini";
	unless (-e $cobg_bgs_fini) {	    
	    my $dasudir_loc_bgs = "$dasudir"."qc1_$bprefix/bgs";
	    my $sys_loc = "$cobg_gw_script --targetdir $cobg_gw_dir --out $bprefix.bgs $dasudir_loc_bgs";
#	    print "$sys_loc\n";
	    push @cobg_gw_arr,$sys_loc;
	    
	}
	else {
	    $cobg_gw_fini++;
	}
    }
}



#print "$impute_dir\n";
#exit;

#exit;
###################################
## combine the best-guess chunks
###################################

if (@cobg_gw_arr > 0) {
    
    $sjadir = $impute_dir;
    $sjaname = "cobg_gw";
    $sjatime = 2;
#    $sjatime = 4 if ($cobg_gw_fini > 0);
    $sjamem = 12000;
    @sjaarray = @cobg_gw_arr;
    
    &send_jobarray;
    
}
else {
    &mysystem ("touch $rootdir/cobg_gw_done");
	
    print "cobg done\n"  if ($debug);
}







###################################
## start pcaer with cobg
###################################
my $cobg_out = "$pcaer_dir/README.pcaer";
unless (-e $cobg_out) {

    chdir ($pcaer_dir);
    my $blist = "";
    foreach (@bimfli_files) {
	my $bprefix = $_;
	$bprefix =~ s/.bim$//;
#	print $bprefix."\n";
	&mysystem ("ln -fs  $cobg_gw_dir/$bprefix.bgs.bim");
	&mysystem ("ln -fs  $cobg_gw_dir/$bprefix.bgs.bed");
	&mysystem ("ln -fs  $cobg_gw_dir/$bprefix.bgs.fam");
	$blist .= " $bprefix.bgs";
    }
    
    my $pcaer_sys = "pcaer --prefercase --preferfam --out cobg_gw.$outname $blist";
    &mysystem ("echo $pcaer_sys > README.pcaer");#
    
    
    chdir ($rootdir);

}





#print "debug\n";
#sleep(10);





chdir ($rootdir);


unless ($noclean) {
    
    print "start cleaning\n"  if ($debug);
    my @clean_arr;    

    unless (-e "$rootdir/clean_done"){


	foreach (@bimfli_files) {
	    my $bprefix = $_;
	    $bprefix =~ s/.bim$//;

#	    my $dasudir_loc = "$dasudir"."_$bprefix";	
	    my $dasudir_loc_qc1f = "$dasudir"."qc1_$bprefix/qc1f";
	    my $dasudir_loc_bg = "$dasudir"."qc1_$bprefix/bg";
	    my $dasudir_loc_bgs = "$dasudir"."qc1_$bprefix/bgs";
	    my $dasudir_loc_bgn = "$dasudir"."qc1_$bprefix/bgn";
	    my $pi_loc = "$rootdir/$impute_dir"."/pi_$bprefix";
	    my $subf_loc = "$rootdir/$impute_dir"."/subbfile_$bprefix";
	    my $haps_loc = "$rootdir/$impute_dir"."/haps_$bprefix";


	    #	print "---dataset $bprefix\n";

	    #	print "$dasudir_loc\n";
	    #	print "$dasudir_loc_qc1f\n";
	    #	print "$pi_loc\n";
	    #	print "$subf_loc\n";
	    #	print "$haps_loc\n";
	    #	print "$pi_eo_loc\n";

#	    if (-e "$dasudir_loc") {
#		push @clean_arr,  "$cleandir_script $dasudir_loc" unless (-e "$dasudir_loc/cleaned");
#	    }
	    if (-e "$dasudir_loc_qc1f") {
		push @clean_arr,  "$cleandir_script $dasudir_loc_qc1f" unless (-e "$dasudir_loc_qc1f/cleaned");
	    }
	    if (-e "$dasudir_loc_bg") {
		push @clean_arr,  "$cleandir_script --full $dasudir_loc_bg" unless (-e "$dasudir_loc_bg/cleaned");
	    }
	    if (-e "$dasudir_loc_bgs") {
		push @clean_arr,  "$cleandir_script --full $dasudir_loc_bgs" unless (-e "$dasudir_loc_bgs/cleaned");
	    }
	    if (-e "$dasudir_loc_bgn") {
		push @clean_arr,  "$cleandir_script --full $dasudir_loc_bgn" unless (-e "$dasudir_loc_bgn/cleaned");
	    }	    
	    


	}




	if (-e "$cobg_dir") {
	    my $cobg_eo_loc = "$cobg_dir/errandout";
	    if (-e "$cobg_eo_loc") {
		push @clean_arr,  "$cleanerrandout_script $cobg_eo_loc" unless (-e "$cobg_eo_loc/cleaned");    
	    }
	    push @clean_arr,  "$cleandir_script --cobg $cobg_dir" unless (-e "$cobg_dir/cleaned");    
	}



	###################################
	### send clean jobs
	###################################

	if (@clean_arr > 0) {

#	    	foreach (@clean_arr) {
#	    	    print "$_\n";
#	    	}
#	    	exit;


	    $sjadir = $rootdir;
	    $sjaname = "clean";
	    $sjatime = 2;
	    $sjamem = 1000;
	    $sjamaxpar = 100;
	    @sjaarray = @clean_arr;

	    &send_jobarray;

	}
	else {
	    &mysystem ("touch $rootdir/clean_done");
	    print "cleaning done\n"  if ($debug);
	}

    }
}
else {
    print "####################################################################################################\n";
    print "--noclean was switched on, please be aware that a lot of intermedieate files are kept right now\n";
    print "####################################################################################################\n";
}






########################################################
## du at end
#################################################



chdir ($rootdir);


print "start du\n"  if ($debug);
my @du_arr;    

push @du_arr,  "$du_script" unless (-e "du.fini");    



###################################
### send clean jobs
###################################

if (@du_arr > 0) {
    
    
    $sjadir = $rootdir;
    $sjaname = "du";
    $sjatime = 2;
    $sjamem = 1000;
    @sjaarray = @du_arr;
    
    &send_jobarray;
    
}






###############################################
### mv the blueprint files to a safe place
###############################################
my $blueprint_sich_dir = "blueprint_bak";

unless (-e $blueprint_sich_dir){
    print "$blueprint_sich_dir is not existing, create one for you\n"  if ($debug);
    my @created = mkpath(   ## $created ?
			    $blueprint_sich_dir,
			    {verbose => 0, mode => 0750},
	);
}


foreach my $floc (@files) {

    if ($floc =~ /^blueprint_jobs__/){
	print "mv $floc\n"  if ($debug);
	my $target = $floc;
	while (-e "$blueprint_sich_dir/$target") {
	    $target = $target.".c";
	}
	&mysystem ("mv $floc $blueprint_sich_dir/$target");
    }
}


#################################################################
## print meta file
#################################################################

if (1) {
### print options with timestamp

my $now = localtime time;
die "$!: $outname.meta" unless open META, ">> $outname.meta";

my $refiexfile_meta = "NA";
my $triosetfile_meta = "NA";
if ($refiex_file) {
    $refiexfile_meta = $refiex_file;
}
if ($trioset_file) {
    $triosetfile_meta = $trioset_file;
}
$now =~ s/ /_/g;

print META "----------------------\t-----($now)----------\n";
print META "variable(see_also_help)\tvalue\n";
print META "reference_directory\t$refdir\n";
print META "reference_snp_info\t$refdir/$suminfo_s.chrXXX.gz\n";
print META "reference_batch_info\t$refdir/$suminfo_n\n";
print META "popname\t$popname\n";
print META "sfh\t$sec_freq\n";
print META "fth\t$fth_th\n";
print META "info_th\t$info_th\n";
print META "freq_th\t$freq_th\n";
print META "bg_th\t$bg_th\n";
print META "bg_miss_th(hardcoded)\t0.02\n";
print META "bgs_maf_th(hardcoded)\t0.05\n";
print META "bgs_miss_th(hardcoded)\t0.01\n";
print META "bgn_th(hardcoded)\tno_filter_on_maf_and_miss_compared_to_dosage\n";
print META "spliha_n\t$spliha_n\n";
print META "refiex_file\t$refiexfile_meta\n";
print META "trioset_file\t$triosetfile_meta\n";
print META "plink\t$ploc\n";
print META "impute2\t$i2loc\n";
print META "liftover\t$liloc\n";
print META "logfiles\t$loloc\n";



close META;

}





###############################################
## JOBPERFORMER, right now only on Broad, has to go behind CLEAN
###################################################

if ($qloc eq "qsub_b") { 
#    my $performer_dir = "$rootdir/performer";    
#    unless (-e $perfomer_dir) {#
#	&mysystem ("mkdir -p $performer_dir");
#    }
    my @performer_arr;


    my $performer_fini = "$impute_dir/perf.$outname.README";
    unless (-e $performer_fini) {
	my $sys_loc = "$performance_script --out perf.$outname $rootdir/$impute_dir $rootdir";
#	print "$sys_loc\n";
#	exit;
	push @performer_arr,$sys_loc;
    }


    if (@performer_arr > 0) {
	
	$sjadir = $impute_dir;
	$sjaname = "performer";
	$sjatime = 2;
	$sjamem = 2000;
	@sjaarray = @performer_arr;
	
	&send_jobarray;
	
    }
    else {
	print "performer done\n"  if ($debug);
    }

}





#print "@merge_arr\n";
#print "debug\n";
#exit;
#############################################################
## SUCCESSSS
#############################################################

$sjadir = $rootdir;
$sjaname = "finished";
push @sjaarray, "tmp";
$sjatime = 2;
$sjamem = 1000;

    
&send_jobarray;
