#!/usr/bin/env perl
use strict;

#############################
# load utility functions
#############################

use FindBin;
use lib "$FindBin::Bin";
use Ricopili::Utils qw(trans);

use lib $ENV{rp_perlpackages};
use Compress::Zlib ;


my $out_name = "metaber";

my $info_th = "0.1";

#############################
# read config file
#############################

#my $ploc = &trans("ploc");
my $sloc = &trans("sloc");
my $meloc = &trans("meloc");
my $rloc = &trans("rloc");

#################################################


my $r_sys = "$rloc";
$r_sys =~ s/_SPACE_/ /g;

system("$r_sys RHOME > /dev/null");
my $status = ($? >> 8);
if ($status != 0) {
    print "I could not start R, maybe you are using an old ricopili configuration?\ne.g. rloc does not need a path but a full starting command.\n";
    exit;
}



use Getopt::Long;
GetOptions( 
   "out=s"=> \$out_name,
   "info_th=s"=> \$info_th,
#   "beta"=> \my $beta,

   "help"=> \my $help,
    "nofilter"=> \my $nofilter,
    "no_neff_filter"=> \my $no_neff_filter,
    "detout"=> \my $detout,

   "xdf"=> \my $xdf,
    "debug"=> \my $debug,
    
 );

if ($help || $out_name eq "metaber"){
    print "usage: $0 danerfile1 danerfile2 ....

      options:

	--help             print this message and exit
        --info_th STRING   threshold for info-score to get into met-analysis
                             defaut $info_th
                             !!! use zero before period  !!!
        --nofilter         no filter at all
        --no_neff_filter   no neff filter 

#        --beta             if there is BETA instead of OR present

         --detout          also include a detailed out-file with all P, OR, SE

        --xdf              do a xdf-P instead of weighted meta
        --out STRING       outname

        --debug            extended output

 works with files coming out of daner (incl. ngt)
 uses metal

 --out is mandatory

 created by Stephan Ripke 2010 at MGH, Boston, MA
 in the frame of the PGC
\n";
    exit 2;
}

#my $out_name = "metaber_".$out_name;


###################################################
###  system call with test if successfull
###################################################

sub mysystem(){
    my ($systemstr)="@_";
    system($systemstr);
    my $status = ($? >> 8);
    die "$systemstr\n->system call failed: $status" if ($status != 0);
}


#####################################
# subroutine to count lines of a file
#####################################

sub count_lines {
    my ($file)=@_;
    my $lc=0;
    die "$file: ".$! unless open FILE, "< $file";
    while (<FILE>){
	$lc++;
    }
    close FILE;
    $lc;
}


##########################################
# subroutine to split a plink-output-line
##########################################

sub split_line {
    my ($line)=@_;
    chomp($line);
    $line =~ s/^[\s]+//g;
    my @cols=  split /\s+/, $line;
}

##########################################
# subroutine to split a plink-output-line with references
##########################################

sub split_line_ref {
    my ($line)=${$_[0]};
    chomp($line);
    $line =~ s/^[\s]+//g;
    my @cols=  split /\s+/, $line;
    \@cols;
}




#####################################
# print array to file
####################################

sub a2file {
    my ($file, @lines)=@_;
    die "$!: $file" unless open FILE, "> $file";
    foreach (@lines){
	print FILE $_;
    }
    close FILE;
}




use File::Path;
use File::Copy;
use Cwd;

my $min_OR = 0.0001;
my $max_OR = 10000;
my $min_BETA = -4;
my $max_BETA = 4;




my $out_short = $out_name;
$out_short =~ s!.*/!!;
#print "$out_name\n";
#print "$out_short\n";

#exit;


my $scratchdir = $sloc;
my $workdir="$scratchdir/dameta_$out_short";



while (-e $workdir) {
    $workdir .= ".d";
}
print "workdir: $workdir\n" if ($debug);

sleep(4);
my @created_work = mkpath(   ## $created ?
			"$workdir",
			{verbose => 0, mode => 0750},
    );


#####################################
### BEGIN
#####################################

my @forest_files=();

#####################################
### perform meta, so we see, which SNPs come out of it.
#####################################


#print "$ploc/plink --out $out_name.comb --meta-analysis @ARGV\n";
#exit;
my $metal_script_name = "$out_name.metal";
my $metal_script = "SCHEME STDERR\n";

if (@ARGV == 1) {
    my $file = $ARGV[0];
    if ($file =~ /.gz$/) {
	&mysystem ("cp $file $out_name.metadaner.gz");
    }
    else {
	&mysystem ("gzip -c $file > $file.gz");
	&mysystem ("cp $file.gz $out_name.metadaner.gz");
    }
    exit;
}


my $or_txt = "";
my $die = 0;

foreach my $file (@ARGV) {


    my $dfile_short = $file;
    $dfile_short =~ s!.*/!!;
    print  "check header of: $dfile_short\n" if ($debug);

    $or_txt = "";
    
    my $df = gzopen("$file", "rb")  or die "Cannot open $file: $gzerrno\n" ;
    $df->gzreadline(my $line);
    my @cells = @{&split_line_ref(\$line)};


    my $sollc = @cells;
    my $ld_sw = 0;


    foreach (@cells) {
	### check for OR column, but also for the first file
	if ($_ eq "BETA") {
	    die "OR and BETA in header, or file-inconsistency: $file" if ($or_txt eq "log(OR)");
	    $or_txt = "BETA";
	}
	if ($_ eq "OR") {
	    die "OR and BETA in header, or file-inconsistency: $file" if ($or_txt eq "BETA");
	    $or_txt = "log(OR)";
	}
	if ($_ eq "R2-LD") {
	    $ld_sw = 1;
	}
    }
    die "no OR or BETA in header\n@cells\n" if ($or_txt eq "");

    my $cc = 0;
    my $line = "ha";
    #    print "new gfil\n";


    if (0) {    
	while ($df->gzreadline(my $line_loc)){
	    $cc++;
	    chomp($line_loc);
	    unless ($line_loc eq "") {
		$line = $line_loc;
	    }
	}


    my @cells = @{&split_line_ref(\$line)};


	if ($sollc != @cells && $line ne "ha") {
	my $cc_loc = @cells;
	print "Warning: column count problem in $file, $sollc, $cc_loc,\n@cells\n" ;
	$die = 1;
	
	}
    }


    
    $df -> gzclose();

    my $filtertxt = "ADDFILTER INFO > 0$info_th";
    if ($ld_sw == 1){
	$filtertxt = "ADDFILTER R2-LD > 0$info_th";
    }
    if ($nofilter) {
	$filtertxt = "";
    }

    my $process_templ ="
MARKER   SNP
WEIGHT   N
ALLELE   A1 A2
FREQ     FRQ_A_218
EFFECT   $or_txt
STDERR   SE
PVAL     P
$filtertxt

PROCESS $workdir/$dfile_short.clean.gz
";
    
    $metal_script .= $process_templ;
    
}

#if ($die == 1){
#    die "column count problem\n" ;
#}


#exit;

#ANALYZE HETEROGENEITY

#print "$metal_script\n";








#####################################
### read all single files for frequences, etc. 
#####################################




#&mysystem ("$ploc/plink --out $out_name.comb --meta-analysis @ARGV");
#exit;
my $nca = 0;
my $nco = 0;

my %fca ;
my %nca ;
my %fco ;
my %nco ;
my %neff ;
my %ngt ;
my %nmet ;
my %info ;
my %a1 ;
my %a2 ;
my %chr ;
my %pos ;
my %p_str ;
my %det_str ;
my %snpex ;



# CHR         SNP          BP  A1  A2   FRQ_A_218   FRQ_U_323    INFO      OR      SE       P   ngt

my $fcount = 0;
my %col = ();



my $neff_max = 0;

foreach my $dfile (@ARGV) {

    my $df = gzopen("$dfile", "rb")  or die "Cannot open $dfile: $gzerrno\n" ;
    $df->gzreadline(my $line);
    my @cells = @{&split_line_ref(\$line)};

    my $neff_max_per_cohort = 0;

#    print "read $dfile\n";

    my $nca_loc;
    my $nco_loc;

    my $ndet_switch = 0; ## switched on if Nca, Nco, Neff exists;
 #   my $nf_loc;


# CHR         SNP          BP  A1  A2     FRQ    INFO      OR      SE       P
# CHR         SNP          BP  A1  A2   FRQ_A_276   FRQ_U_521    INFO      OR      SE       P   ngt
    %col = ();
    my $cc =0;
    foreach (@cells) {
	$col{$_} = $cc;
	$cc++;
    }

#    my $chr_col = 0;
#    my $snp_col = 1;
#    my $pos_col = 2;
#    my $a1_col = 3;
#    my $a2_col = 4;
#    my $fa_col = 5;
#    my $fu_col = 6;
#    my $info_col = 7;
#    my $or_col = 8;
#    my $se_col = 9;
#    my $p_col = 10;
#    my $ngt_col = 11;


#FRQ_Cases FRQ_Controls

#    if ($dfile =~ /MDD29.0515a_mds6b/) {
#	print "@cells\n";
#	sleep (10);
#    }
    
    my $cc =0;
    foreach (@cells) {

	if ($_ =~ /FRQ_Cases_/) {
	    $nca_loc = $_;
	    $nca_loc =~ s/FRQ_Cases_//;
	    $col{"FRQA"} = $cc;
	}
	if ($_ =~ /FRQ_Controls_/) {
	    $nco_loc = $_;
	    $nco_loc =~ s/FRQ_Controls_//;
	    $col{"FRQU"} = $cc;
	}

	if ($_ =~ /FRQ_A_/) {
	    $nca_loc = $_;
	    $nca_loc =~ s/FRQ_A_//;
	    $col{"FRQA"} = $cc;
	}
	if ($_ =~ /FRQ_U_/) {
	    $nco_loc = $_;
	    $nco_loc =~ s/FRQ_U_//;
	    $col{"FRQU"} = $cc;
	}

	if ($_ =~ /MAF/) {
	    $col{"FRQ"} = $cc;
	}

	if ($_ =~ /Neff_half/) {
	    $col{"Neff"} = $cc;
	    $ndet_switch = 1;
	}
	if ($_ =~ /Nca/) {
	    $col{"Nca"} = $cc;
	}
	if ($_ =~ /Nco/) {
	    $col{"Nco"} = $cc;
	}
	

#	if ($_ eq "FRQ") {
#	    $nf_loc = $_;
#	    $nco_loc =~ s/FRQ_U_//;
#	}
	$cc++;
    }

    unless (exists $col{"FRQA"}){
	print "$dfile; no FRQ_A, so I take FRQ\n" if ($debug);


	if (exists $col{"FRQ"}){
	    print "found FRQ\n"  if ($debug);
	    $col{"FRQA"} = $col{"FRQ"};
	}
	elsif (exists $col{"FRQU"}) {	    
	    print "found FRQU\n"  if ($debug);
	    $col{"FRQA"} = $col{"FRQU"};
	}
	else {
	    die "no FRQ column" ;
	}


    }
    unless (exists $col{"FRQU"}){
	print "$dfile: no FRQ_U, so I take FRQ\n";
	die "no FRQ column" unless (exists $col{"FRQ"});
	$col{"FRQU"} = $col{"FRQ"};
    }

    unless (exists $col{"FRQU"}){
	print "no frequency (unaff.) column in $dfile\n";
	die;
    }

    unless (exists $col{"FRQA"}){
	print "no frequency (aff.) column in $dfile\n";
	die;
    }

    if ($ndet_switch == 1) {
	unless (exists $col{"Nca"}){
	    print "no Nca (but Neff) column in $dfile\n";
	    die;
	}
	unless (exists $col{"Nco"}){
	    print "no Nco (but Neff) column in $dfile\n";
	    die;
	}
	print "Nca for $dfile: ".$col{"Nca"}."\n" if ($debug);
	print "Nco for $dfile: ".$col{"Nco"}."\n" if ($debug);
	print "Neff for $dfile: ".$col{"Neff"}."\n" if ($debug);
	sleep (3) if ($debug);	
    }
    else {
	print "no Nca, Nco, Neff in $dfile\n" if ($debug);
    }
    


    my $no_fca = 0;
    if ($nca_loc == 0){
	$nca_loc = 100;
	$no_fca = 1;
    }
    $nco_loc = 100 if ($nco_loc == 0);


    $nca += $nca_loc;
    $nco += $nco_loc;


    my $ngt_sw = 1;
    $ngt_sw = 0 unless (exists $col{"ngt"});

    my $beta = 0;
    if (exists $col{"BETA"}) {
	$beta = 1;
    }

    my $nwarnings=0;
    while ($df->gzreadline(my $line)){
	chomp($line);


	my @cells = @{&split_line_ref(\$line)};

#	print $cells[1]."\n";
	## read basic information
#	if ($fc == 1) {
	my $flip = 0;

	#### debug one SNP
#	if ($cells[$col{"SNP"}] eq "chr19_1524992_D" ){
#	    print "yes, hit: $dfile\n";
#	    print $cells[$col{"SNP"}];
#	    print "\t".$cells[$col{"A1"}];
#	    print "\t".$cells[$col{"A2"}];
#	    print "\n";
#	}


	unless (exists $chr{$cells[$col{"SNP"}]}){


#	    if ($cells[$col{"SNP"}] eq "chr19_1524992_D" ){
#		print "not yet existing\n";
#	    }

	    
	    $a1{$cells[$col{"SNP"}]} = $cells[$col{"A1"}];
	    $a2{$cells[$col{"SNP"}]} = $cells[$col{"A2"}];
	    $chr{$cells[$col{"SNP"}]} = $cells[$col{"CHR"}];
	    if (exists $col{"POS"}) {
		$pos{$cells[$col{"SNP"}]} = $cells[$col{"POS"}];
	    }
	    else {
		$pos{$cells[$col{"SNP"}]} = $cells[$col{"BP"}];
	    }

	}
	else {
#	    if ($cells[$col{"SNP"}] eq "chr19_1524992_D" ){
#		print "already existing\n";
#	    }



	    
	    if ($cells[$col{"A1"}] ne $a1{$cells[$col{"SNP"}]} || $cells[$col{"A2"}] ne $a2{$cells[$col{"SNP"}]}){

		if ($cells[$col{"A1"}] eq $a2{$cells[$col{"SNP"}]} && $cells[$col{"A2"}] eq $a1{$cells[$col{"SNP"}]}) {
		    $flip = 1;
		}
		else {

		    

#		    if ($cells[$col{"SNP"}] eq "chr19_1524992_D") {
#			print "exclude: ".$cells[$col{"SNP"}]."\t";
#			print $cells[$col{"A1"}]."\t";
#			print $cells[$col{"A2"}]."\t";
#			print $a1{$cells[$col{"SNP"}]}."\t";
#			print $a2{$cells[$col{"SNP"}]}."\n";
#		    }
		    
		    $snpex{$cells[$col{"SNP"}]} = 1;

#		    $metal_script .= "ADDFILTER MARKER_ID IN (".$cells[$col{"SNP"}].")\n";
#		    print "error: ".$cells[$col{"A1"}]."\n";
#		    print "error: ".$cells[$col{"A2"}]."\n";
#		    print "error: ".$a1{$cells[$col{"SNP"}]}."\n";
		    #		    print "error: ".$a2{$cells[$col{"SNP"}]}."\n";
		    
#		    exit;
		    }
	    }
	}
#	}

	my $info_loc = $cells[$col{"INFO"}];

#	print $info_loc."\t";
#	print $info_th."\n";
	if ($info_loc < $info_th){
#	    $p_str{$cells[$col{"SNP"}]} .= "\tNA";

	    if ($detout) {
		$det_str{$cells[$col{"SNP"}]} .= "\t-";
		$det_str{$cells[$col{"SNP"}]} .= "\t-";
		$det_str{$cells[$col{"SNP"}]} .= "\t-";
	    }
	    next ;
	}

	my $tmp = $cells[$col{"FRQA"}];
#	print "freq: $tmp\n";

	my $fca_loc = $cells[$col{"FRQA"}];
	my $fco_loc = $cells[$col{"FRQU"}];
#	print $cells[$col{"SNP"}].": $fca_loc\t$fco_loc\n";
	if ($no_fca){
	    $fca_loc = $cells[$col{"FRQU"}] * $cells[$col{"OR"}];
	}
#	print $cells[$col{"SNP"}].": $fca_loc\t$fco_loc\n\n";
	if ($flip == 1){
	    $fca_loc = 1 - $fca_loc;
	    $fco_loc = 1 - $fco_loc;
#	    print "FLIP\n";
	}


	
	my $neff_loc = 0;

	if ($ndet_switch == 1) {
	    $nca_loc = $cells[$col{"Nca"}];
	    $nco_loc = $cells[$col{"Nco"}];
	    $neff_loc = $cells[$col{"Neff"}];
	}
	else {
	    if ($nca_loc + $nco_loc > 0) {
		$neff_loc = (4*$nca_loc*$nco_loc/($nca_loc+$nco_loc))/2;
	    }
	}



	if ($cells[$col{"P"}] == "NA"){
		if ($nwarnings < 10) {
		    print "Warning: invalid P, exclude this SNP from Nca, Nco, Neff: ".$cells[$col{"SNP"}]."\n" if ($debug);
		}
		if ($nwarnings == 10) {
		    print "more than 10 warnings, not printing out more....\n" if ($debug);
		}
		$nwarnings++;
		
		next;
	    }


#	if ($cells[$col{"OR"}] eq "NA"){
	#	if ($cells[$col{"OR"}]*1 !=  $cells[$col{"OR"}] || $cells[$col{"OR"}] <= 0){
	if ($beta) {

	    if ($cells[$col{"BETA"}] < $min_BETA || $cells[$col{"BETA"}] > $max_BETA){
		if ($nwarnings < 10) {
		    print "Warning: invalid BETA, exclude this SNP from Nca, Nco, Neff: ".$cells[$col{"SNP"}]."\n" if ($debug);
		}
		if ($nwarnings == 10) {
		    print "more than 10 warnings, not printing out more....\n" if ($debug);
		}
		$nwarnings++;
		
		next;
	    }
	    
	}
	else {
	    if ($cells[$col{"OR"}] < $min_OR || $cells[$col{"OR"}] > $max_OR){
		if ($nwarnings < 10) {
		    print "Warning: invalid OR, exclude this SNP from Nca, Nco, Neff: ".$cells[$col{"SNP"}]."\n" if ($debug);
		}
		if ($nwarnings == 10) {
		    print "more than 10 warnings, not printing out more....\n" if ($debug);
		}
		$nwarnings++;
		
		next;
	    }
	}



	if ($neff_loc > $neff_max_per_cohort) {
	    $neff_max_per_cohort = $neff_loc;
	}
	
	$nca{$cells[$col{"SNP"}]} += $nca_loc;
	$nco{$cells[$col{"SNP"}]} += $nco_loc;
	$neff{$cells[$col{"SNP"}]} += $neff_loc;
	$fca{$cells[$col{"SNP"}]} += $fca_loc * $nca_loc;
	$fco{$cells[$col{"SNP"}]} += $fco_loc * $nco_loc;
	$info{$cells[$col{"SNP"}]} += $cells[$col{"INFO"}] * $nco_loc; ## weight by N
	if ($ngt_sw ==1) {
	    $ngt{$cells[$col{"SNP"}]} += $cells[$col{"ngt"}] ;
	}
	$nmet{$cells[$col{"SNP"}]}++;

	$p_str{$cells[$col{"SNP"}]} .= "\t".$cells[$col{"P"}];

	if ($detout) {
	    $det_str{$cells[$col{"SNP"}]} .= "\t".$cells[$col{"P"}];
	    $det_str{$cells[$col{"SNP"}]} .= "\t".$cells[$col{"OR"}];
	    $det_str{$cells[$col{"SNP"}]} .= "\t".$cells[$col{"SE"}];
	}

    }

    $df -> gzclose();
    $fcount++;
    $neff_max += $neff_max_per_cohort;
}

print "$nca\n" if ($debug);
print "$nco\n" if ($debug);
print "$metal_script_name\n" if ($debug);

print "neff_max: $neff_max\n" if ($debug);
#sleep(3);



###################
    ## clean files
    ####

my $die = 0;

foreach my $dfile (@ARGV) {


    my $dfile_short = $dfile;
    $dfile_short =~ s!.*/!!;
#    print  "$dfile\n";
#    print  "$dfile_short\n";
#    exit;

    my $df = gzopen("$dfile", "rb")  or die "Cannot open $dfile: $gzerrno\n" ;
    my $dfo = gzopen("$workdir/$dfile_short.clean.gz", "wb")  or die "Cannot open $workdir/$dfile_short.clean.gz: $gzerrno\n" ;
    $df->gzreadline(my $line);

    my $beta = 0;
    my @cells = @{&split_line_ref(\$line)};
    if ($cells[8] eq "BETA"){#
	$beta = 1;
    }
    
    
    $dfo->gzwrite($line);
    
    
    while ($df->gzreadline(my $line)){
	chomp($line);
	my @cells = @{&split_line_ref(\$line)};


	if (@cells < 11) {	
	    print "Error: ".$cells[$col{"SNP"}]." does not have enough cells in $dfile\n" ;
	    $die = 1;
	}

	## remove OR lower than 1.0e-04 or bigger than 1.0e04)
	if ($beta){
	    if ($cells[8] < $min_BETA || $cells[8] > $max_BETA) {
		next;
	    }
	}
	else {
	    if ($cells[8] < $min_OR || $cells[8] > $max_OR) {
		next;
	    }
	}
	
	if (exists $snpex{$cells[$col{"SNP"}]}) {
	    next;
	}
	$dfo->gzwrite($line."\n");
	
    }
    
    $df -> gzclose();
    $dfo -> gzclose();
    
    print "$dfile cleaned\n" if ($debug);
#    print "$dfile.clean.gz\n";
#    exit;
}


if ($die == 1){
    die "column count problem\n" ;
}



### do the meta analysis.


$metal_script .= "OUTFILE $out_name.meta .tbl\n";
$metal_script .= "ANALYZE HETEROGENEITY";
&a2file ($metal_script_name,$metal_script);
&mysystem ("$meloc/metal < $metal_script_name > $out_name.log");



#print "debug\n";
#print "debug\n";
#exit;

    ###################
    ## remove cleaned file
    ####

#foreach my $dfile (@ARGV) {
    
#    print "remove $dfile.clean.gz\n" ;
#    &mysystem ("rm $dfile.clean.gz") ;
    
#}


## print out SNPex list

die $! unless open FILE1, "> $out_name.meta.snpex";
foreach (keys %snpex) {
    print FILE1 "$_\t";
    print FILE1 $a1{$_}."\t";
    print FILE1 $a2{$_}."\n";

}
close FILE1;



################################################
#############################################
### DETOUT
################################################
###############################################

my %pxdf;

################################################
### write p-values for xdf test
###############################################

if ($xdf) {
    die "$out_name.detout: $!" unless open PSTR, "> $out_name.detout"; 
    foreach (keys %p_str) {
	
#    print PSTR $_.$p_str{$_};
	print PSTR $p_str{$_};
	if ($nmet{$_} != $fcount) {
	    foreach my $cloc (1..($fcount - $nmet{$_})){
		print PSTR "\tNA";
	    }
	}
	print PSTR "\n";
    }
    
    close PSTR;
    
    my $nxdf = keys %p_str;
}
    
################################################
#############################################
### XDF
################################################
###############################################

my %pxdf;

################################################
### write p-values for xdf test
###############################################

if ($xdf) {
    die "$out_name.p_str: $!" unless open PSTR, "> $out_name.p_str"; 
    foreach (keys %p_str) {
	
#    print PSTR $_.$p_str{$_};
	print PSTR $p_str{$_};
	if ($nmet{$_} != $fcount) {
	    foreach my $cloc (1..($fcount - $nmet{$_})){
		print PSTR "\tNA";
	    }
	}
	print PSTR "\n";
    }
    
    close PSTR;
    
    my $nxdf = keys %p_str;
    
    
################################################
### perform xdf test in R
###############################################
    
    my $R_templ = '

matrix(scan("INNAME"),NSNPS,NSTUDIES,byrow=T)-> p5m

xdf <- function(x){ 
       1-pchisq (sum(qchisq(1-x,1),na.rm=T),length(na.omit(x))) 
   }

apply(p5m, 1, xdf)-> xdf_result
write.table(xdf_result,"OUTNAME",col.names=F,row.names=F)
';
    
    
    $R_templ =~ s/INNAME/$out_name.p_str/g ;
    $R_templ =~ s/OUTNAME/$out_name.xdf/g ;
    $R_templ =~ s/NSTUDIES/$fcount/g ;
    $R_templ =~ s/NSNPS/$nxdf/g ;
    
    
    &a2file ( "R_$out_name.xdf" , $R_templ);
    &mysystem("$r_sys < R_$out_name.xdf --vanilla ");
    
    
    
    
    
################################################
### XDF out
###############################################
    
    
    die "$out_name.xdf: $!" unless open XDF, "< $out_name.xdf"; 
    
    foreach (keys %p_str) {
	my $line = <XDF>;
	chomp($line);
	$pxdf{$_} = $line; 
    }
    close XDF;
    
}

#exit;

################################################
### read combined meta, write new daner_file
###############################################

#MarkerName  Allele1  Allele2  Effect   StdErr   P-value  Direction  HetISq  HetChiSq  HetDf  HetPVal

die "$out_name.meta: $!" unless open META, "< $out_name.meta1.tbl"; 
die "$out_name.metadaner.tmp: $!" unless open DANER, "> $out_name.metadaner.tmp"; 
my $header = <META>;
#print DANER "CHR\tSNP\tBP\tA1\tA2\tFRQ_A_$nca\tFRQ_U_$nco\tINFO\tOR\tSE\tP\tngt\tDirection\tHetISqt\tHetChiSq\tHetDf\tHetPVa";
print DANER "CHR\tSNP\tBP\tA1\tA2\tFRQ_A_$nca\tFRQ_U_$nco\tINFO\tOR\tSE\tP\tngt\tDirection\tHetISqt\tHetDf\tHetPVa\tNca\tNco\tNeff_half";

if ($detout) {
    foreach my $dfile (@ARGV) {
	print DANER "\tP-$dfile\tOR-$dfile\tSE-$dfile";
    }
}
print DANER "\n";

#print DANER "CHR\tSNP\tBP\tA1\tA2\tFRQ_A_$nca\tFRQ_U_$nco\tINFO\tOR\tSE\tP\tngt\tDirection\tHetISqt\tHetChiSq\tHetDf\tHetPVa\txdfPVa\n";



my $neff_max_half = $neff_max/2;
#if ($nca>0 || $nco > 0){
#    $neff_max_half = (4*$nca*$nco/($nca+$nco))/4;;
#}
print "neff_max_half: $neff_max_half\n" if ($debug);
#sleep(3);


my $neff_ex_n =0;

while (<META>){
    chomp;
    my @cells = @{&split_line_ref(\$_)};


#    my $snp_name = $cells[2];
    my $snp_name = $cells[0];
    my $a1_name = $cells[1];
    my $a2_name = $cells[2];
    my $effect = $cells[3];


    unless ($a1_name =~ m/\+[0-9]+a/ || $a1_name =~ m/\+[0-9]+b/) {    
	$a1_name = uc($a1_name);
    }
    else {
	my @st = split '\+', $a1_name;
	$st[0] = uc($st[0]);
	$a1_name = $st[0]."+".$st[1];
    }
    unless ($a2_name =~ m/\+[0-9]+a/ || $a2_name =~ m/\+[0-9]+b/) {    
	$a2_name = uc($a2_name);
    }
    else {
	my @st = split '\+', $a2_name;
	$st[0] = uc($st[0]);
	$a2_name = $st[0]."+".$st[1];
    }
#    $a2_name = uc($a2_name);


    if ($a1_name ne $a1{$snp_name} || $a2_name ne $a2{$snp_name}){
	if ($a1_name eq $a2{$snp_name} && $a2_name eq $a1{$snp_name}) {
	    $effect = $effect * (-1);
	}
	else {
	    print "allelenames at $snp_name changed, $a1_name, $a1{$snp_name}, $a2_name, $a2{$snp_name} -> ERROR\n";
	    die;
	}
    }

#    if ($cells[12] <= 0 || $cells[12] > 1) {
#	die "unvalid p $cells[12] at $snp_name";
 #   }

    die "$snp_name" unless (exists $chr{$snp_name});

    my $out_row = "";

    $out_row .= $chr{$snp_name};
    $out_row .= "\t".$snp_name;
    $out_row .= "\t".$pos{$snp_name};
    $out_row .= "\t".$a1{$snp_name};
    $out_row .= "\t".$a2{$snp_name};

#    printf DANER "\t%.3g",$fca{$snp_name}/$nca;
#    printf DANER "\t%.3g",$fco{$snp_name}/$nco;
#    printf DANER "\t%.3g",$info{$snp_name}/$nco;
    if ($nca{$snp_name} == 0) {
	print "***warning: no meta case-number for $snp_name\n" if ($debug);
	$nca{$snp_name} = 1;
	$fca{$snp_name} = 0;
    }
    if ($nco{$snp_name} == 0) {
	print "***warning: no meta control-number for $snp_name\n" if ($debug);
	$nco{$snp_name} = 1;
	$fco{$snp_name} = 0;
	$info{$snp_name} = 0;
    }

#    die "$nca ($snp_name); fca: $fca{$snp_name}, fco $fca{$snp_name}" if ($nca{$snp_name} == 0);
#    die "$nco ($snp_name)" if ($nco{$snp_name} == 0);

    $out_row .= sprintf  "\t%.3g",$fca{$snp_name}/$nca{$snp_name};
#    print "fca:".$fca{$snp_name};
#    print "\tnca:".$nca{$snp_name};
#    print "\n";
    $out_row .= sprintf  "\t%.3g",$fco{$snp_name}/$nco{$snp_name};
    $out_row .= sprintf  "\t%.3g",$info{$snp_name}/$nco{$snp_name};


    $out_row .= sprintf  "\t%.5f",exp($effect);
    $out_row .= sprintf  "\t%.4g",$cells[4];
    if ($xdf){
	$out_row .= sprintf  "\t%.3g",$pxdf{$snp_name};
    }
    else {
	$out_row .= sprintf  "\t%.4g",$cells[5];
    }
    if (exists $ngt{$snp_name}) {
	$out_row .= "\t".$ngt{$snp_name};
    }
    else {
	$out_row .= "\t-";
    }

    $out_row .= "\t".$cells[6];
    $out_row .= "\t".$cells[7];
#    $out_row .= "\t".$cells[8];
    $out_row .= "\t".$cells[9];
    $out_row .= "\t".$cells[10];
    $out_row .= "\t".$nca{$snp_name};
    $out_row .= "\t".$nco{$snp_name};
    my $neff_str = sprintf "%.2f", $neff{$snp_name};
    $out_row .= "\t".$neff_str;

    my $neff_ex = 0;
    unless ($nofilter) {
	unless ($no_neff_filter) {
	    if ($neff_str < $neff_max_half) {
		if ($neff_ex_n < 10){
		    print "Warning: exclude $snp_name because of low Neff" if ($debug);
		    print "\t$neff_str\n" if ($debug);
		    #		sleep (3);
		}
		if ($neff_ex_n == 10){
		    print "more exclusions not printing\n" if ($debug);
		}	
		$neff_ex = 1;
		$neff_ex_n++;
	    }
	}
    }
#    for my $i (6..10) {
#	$out_row .= "\t".$cells[$i];
#    }
#    $out_row .= sprintf  "\t%.3g",$pxdf{$snp_name};
#    $out_row .= "\t".$nmet{$snp_name};


    if ($detout){
	$out_row .= "\t".$det_str{$snp_name};
    }
    $out_row .= "\n";
    unless ($neff_ex) {
	print DANER "$out_row";
    }


}
close META;
close DANER;
&mysystem ("gzip -f $out_name.metadaner.tmp");
&mysystem ("mv $out_name.metadaner.tmp.gz $out_name.metadaner.gz");
&mysystem ("rm $out_name.meta1.tbl");

print "cleaning up\n" if ($debug);
&mysystem ("rm -rf $workdir");

