#!/usr/bin/env perl
use strict;

my $version = "3.0.1";
my $progname = $0;
$progname =~ s!^.*/!!;

#############################
# load utility functions
#############################

use FindBin;
use lib "$FindBin::Bin";
use Ricopili::Utils qw(trans);

#############################
# read config file
#############################

my $rloc = &trans("rloc");
my $sloc = &trans("sloc");
#my $home = &trans("home");

########################
## read study names

my %stuna ;
#print "study.names not found in homedir\n" unless open FILE, "< $home/study.names";
#while (my $line = <FILE>){
#    my @cells = split /\s+/, $line;
#    $stuna{$cells[0]} = $cells[1];
#}
#close FILE;

########################


my $r_sys = "$rloc";
$r_sys =~ s/_SPACE_/ /g;

system("$r_sys RHOME > /dev/null");
my $status = ($? >> 8);
if ($status != 0) {
    print "I could not start R, maybe you are using an old ricopili configuration?\ne.g. rloc does not need a path but a full starting command.\n";
    exit;
}



#############################
# test, if running on server
#############################
use Sys::Hostname;
my $host = hostname();
#my $lisa=0;
#$lisa=1 if ($host =~ m/sara/) ;
#my $broad = 1 if ($ENV{"DOMAINNAME"} =~ /broadinstitute/);


#my $rloc = "/home/radon01/sripke/bakker_ripke/R-base";
#my $rloc_lisa = "/home/gwas/bin/R-base";

my $last_th = 0.01;

my $pcol=4;
my $poscol=3;
my $chrcol=1;
my $snpcol=2;
my $pth=1.0e-04; ## from here thin them down
my $ceiling=10e-200; # log-ceiling

my $title="";
my $ndots=5000;

my $expar=10000;

my $fac=3000;  # the highest 50 ones for sure, rest randomly

##### help message
my $usage = "
Usage : $progname [options] p-file

version: $version

  --title STRING title of plot, default= filename
  --top INT      number of top-values to mark
  --chr INT      plot one chromosome
  --cp STRING    plot all chromosomes that appear in arafile STRING
  --reg INT.INT  region (beginning and end, separated by commma)
  --check        check columns with sample output
  --cols STRING  combined column-string, separated by commas, overwrites other options
                     SNPCOL,PCOL,CHRCOL,POSCOL
  --areas STRING area surrounding a SNP
  --expar INT    expanding area of SNP (in KB), default=$expar
  --ceiling INT  ceiling for 10e-(INT)
  --maxy    INT  yaxis exactly here
  --sig-gwa      add gwa-singificance level 5x 10e-8
  --genef STR    name of file containing gene-reference
  --nolog        print p-vlues as they stand there

  --pth FLOAT    threshold, from where thinning down
  --last FLOAT   threshold, from where no p-values
  --help         print this message and exit  

  --fac INT      use these many as minimum, then scale down
                     default: $fac

  --prekno FILE  pre-reported regions, outside GWsig will be colored blue

  --gc FLOAT     genomic control with FLOAT = lsmbda

  --debug        extended output

 created by Stephan Ripke 2008 at MGH, Boston, MA
 
 pfile will be sorted for pvalue if nercessary, iv you do it in advance you save time and money

";
use File::Path;
use Cwd;
my $rootdir = &Cwd::cwd();


#### evaluate options
my $areafile = "";
my $lambda = 1.0;

use Getopt::Long;
GetOptions( 
    "title=s"=> \$title,
    "top=i"=> \my $ntop,
    "chr=i"=> \my $chr,
    "cp=s"=> \$areafile,

    "bp"=> \my $bpaxis,
    "check"=> \my $check,
    "cols=s"=> \my $colstr,
    "reg=s"=> \my $regstr,
    "areas=s"=> \my $areas,
    "genef=s"=> \my $gene_file,
    "expar=i"=> \$expar,
    "pth=f"=> \$pth,
    "last=f"=> \$last_th,
    "fac=i"=> \$fac,
    "debug"=> \my $debug,

    "ceiling=i"=> \my $ceiling_sw,
    "maxy=i"=> \my $maxy_sw,
    "sig-gwa"=> \my $sig_sw,
    "nolog"=> \my $nolog,
    "help"=> \my $help,
    "prekno=s"=> \my $prekno_file,
    "gc=f"=> \$lambda,

    );


die "$usage\n" if @ARGV ne 1 || $help;


my $r_silent = "> /dev/null 2>&1";
if ($debug) {
    $r_silent = "";
}

my $prekno_th = 5.0e-08;


$ceiling = 10**(-$ceiling_sw) if ($ceiling_sw);
my $maxy = 0;
$maxy = 10**(-$maxy_sw) if ($maxy_sw);

my $pfile=$ARGV[0];


my $workdir = "$sloc/gwa_$title";

while (-e $workdir) {
    $workdir .= ".g";
}

my @created = mkpath(   ## $created ?
			"$workdir",
			{verbose => 0, mode => 0750},
    );



chdir ($workdir);

&mysystem ("cp $rootdir/$pfile .");


my @prekno_arr;

if ($prekno_file) {
    &mysystem ("cp $rootdir/$prekno_file .");

    die "$prekno_file not existing" unless open PF, "< $prekno_file";
    while (my $line =<PF>){
	my @cells = &split_line($line);
	my $str_tmp = sprintf "%d\t%d\t%d",$cells[1],$cells[2]*1000000,$cells[3]*1000000;
	push @prekno_arr, $str_tmp;
#	print "$str_tmp\t$line\n";
    }
    close PF;

}
#exit;


if ($pfile =~ /.gz$/){
    &mysystem ("gunzip -f $pfile");
    $pfile =~ s/.gz$//;
}
#print $ceiling."\n";
#exit;

($snpcol,$pcol,$chrcol,$poscol)= split ',', $colstr if ($colstr);

(my $regbeg,my $regend)= split ',', $regstr if $regstr;

my $genbeg=-1000;
my $genend=-1000;

unless ($chr){
    die "please specify chromosome" if ($regstr);
}

$regbeg *= 1000;
$regend *= 1000;
#print "$regbeg\t$regend\n";
#exit;





my $outdir=$pfile."_tmp_gwa_plot";
my $top5Kfile=$pfile.".top5K";
my $positionfile=$pfile.".pos";
my $refgene_file="refGene_processed.txt";

my $pdfout = $pfile."_gwa.pdf" if ($title eq "");
$pdfout = $title."_gwa.pdf" unless ($title eq "");
$title = $pfile."_GWA" if ($title eq "") ; 

foreach my $sn (keys %stuna) {
    $title = $stuna{$sn} if ($title =~ /$sn/);
}



use File::Path;
my @created = mkpath(   ## $created ?
    "$outdir",
    {verbose => 0, mode => 0750},
    );


#####################################
# grep analog
####################################

sub greppi {
    my ($expr, $file)=@_;
    my @lc=();
    die $! unless open FILE, "< $file";
    while (<FILE>){
	push @lc, $_ if ($_ =~ "$expr");
    }
    close FILE;
    @lc;
}


##########################################
# subroutine to split a plink-output-line
##########################################

sub split_line {
    my ($line)=@_;
    chomp($line);
    $line =~ s/^[\s]+//g;
    my @cols=  split /\s+/, $line;
}


###################################################
###  system call with test if successfull
###################################################
sub mysystem(){
    my ($systemstr)="@_";
    system($systemstr);
    my $status = ($? >> 8);
    die "$systemstr\n->system call failed: $status" if ($status != 0);
}


#####################################
# print array to file
####################################

sub a2file {
    my ($file, @lines)=@_;
    die $! unless open FILE, "> $file";
    foreach (@lines){
	print FILE $_;
    }
    close FILE;
}


#####################################
# subroutine to count lines of a file
#####################################

sub count_lines {
    my ($file)=@_;
    my $lc=0;
    die "$file: ".$! unless open FILE, "< $file";
    while (<FILE>){
	$lc++;
    }
    close FILE;
    $lc;
}


###   here preparation of ucsc-file: 
###   sed  -r '/chr[0-9XY]+_.*/d'  refGene.txt | sed 's/chrX/chr23/' | sed 's/chrY/chr24/' | sed 's/chr//' |cut -f3,5,6,13 | sort | uniq > refGene_processed.txt


#&mysystem ("use R-2.10") if ($broad);

####################################
################################
#######BEGIN
#################################
###################################

######################################
##  if cols are not sure, check them
######################################
my $count_p=0;
if ($check){
    print "$usage\n";
    print "SNPCOL\tPCOL\tCHRCOL\tPOSCOL\n";
    die $! unless open PF , "< $pfile";
    while ($count_p++ < 5){
	last unless (my $line=<PF>);
	my @cells = &split_line($line);
	printf "%s\t%s\t%s\t%s\n",$cells[$snpcol-1],$cells[$pcol-1],$cells[$chrcol-1],$cells[$poscol-1];
    }
    close PF;
    print "\n";
    print "\nHeader of original File:\n";
    die $! unless open PF , "< $pfile";
    my $line=<PF>;
    my @cells = &split_line($line);
    $count_p=0;
    foreach (@cells){
	$count_p++;
	printf "$count_p\t$_\n";
    }
    close PF;
    exit 1;
}




#############################
##   check sorting
#############################

#print "checking sorting\n";
my $systemcheck="sort -k$pcol,$pcol"."g"." -c $pfile 2> /dev/null";
system ($systemcheck);
my $status = ($? >> 8);
if ($status != 0){

    print "pfile not sorted, will do it for you\n" if ($debug);
    my $systemsort="head -1 $pfile > sorted_$pfile; tail -n +2 $pfile | sort -k$pcol,$pcol"."g"." >> sorted_$pfile";
    &mysystem ($systemsort);
    $pfile = "sorted_$pfile";
}





###############################
##   areas surrounding SNP
##############################


if ($areas){


    my ($line) = &greppi($areas,$pfile);
    my @cells = &split_line($line);
    $regbeg = $cells[$poscol-1] - $expar*1000;
    $regend = $cells[$poscol-1] + $expar*1000;
    $genbeg = $cells[$poscol-1] - 200*1000;
    $genend = $cells[$poscol-1] + 200*1000;

    $chr = $cells[$chrcol-1];
    $regstr="on";
#    print "$line\n";
#    exit;

#print "$regbeg\t$regend\t$cells[$poscol-1]\n";
#exit;

}


################################
##  get gene list
###############################
my %gene_list=();
my $gene_list_str;
if ($gene_file){
    die "gene-list only only with area-SNP" unless $areas;

    die $! unless open GENE , "< $gene_file";


    while (<GENE>){
	chomp;
	my @cells = &split_line($_);
	if ($chr == $cells[0]){

#	    if ($cells[1] <= $regbeg){
#		$gene_list{$cells[3]}++ if ($cells[2] > $regend);
#	    }
#	    if ($cells[1] > $regbeg){
#		$gene_list{$cells[3]}++ if ($cells[2] < $regend);
#	    }


	    if ($cells[1] <= $genbeg){
		if ($cells[2] > $genbeg){
		    $gene_list{$cells[3]}++;
#		    print "$_\t$cells[1]\t$genend\n";
		}
	    }


	    if ($cells[1] > $genbeg){
		if ($cells[1] <= $genend){
		    $gene_list{$cells[3]}++ if ($cells[1] <= $genend);
#		    print "$_\t$cells[1]\t$genend\n";
		}
	    }
	}
    }

    close GENE;

#    print "$genbeg\t$genend\n";
    foreach (sort keys %gene_list){
	$gene_list_str .= "$_ ";
    }

}

my $nsnps = &count_lines ($pfile);


################################
##  prep Best-File
###############################
srand(0);
#print "prep Best File\n";


my $lc=0;
##################!!!!!!

die $! unless open PF , "< $pfile";
die $! unless open T5 , "> $outdir/$top5Kfile";
$count_p=0;
my $header= <PF>;
#while ($count_p++ < $ndots){

$last_th = $pth if ($pth > $last_th);
my $rth=1;
my $pc = 0;
my $smallest_p = 2;
my $smallest_p_count = 0;

#print "pth: $pth\n";
#print "fac: $fac\n";
#print "lc: $lc\n";
while (1){
    last unless (my $line=<PF>);



    my @cells = &split_line($line);

    next if ($cells[$chrcol-1] < 1 || $cells[$chrcol-1] > 30);

    next if ($cells[$pcol-1] eq "NA");



    $lc++; #########!!
    $rth -= 1/$ndots;
    if (rand() > $fac/($lc) && $cells[$pcol-1] > $pth){
#	print "pth: $pth\n";
#	print "p: $cells[$pcol-1]\n";
#	print "fac: $fac\n";
#	print "lc: $lc\n";
	next;
#	exit;
    }; ###############!!
#    next if (rand() > $rth+1 && $cells[$pcol-1] > $pth); ###############!!



    last if ( $cells[$pcol-1] > $last_th);
    next if ($cells[$pcol-1] eq "NA" || $cells[$pcol-1] == 0);
    $smallest_p = $cells[$pcol-1] if ($smallest_p_count == 10);
    $smallest_p_count++ ;


#    print "$cells[$pcol-1]\n";
    unless ($nolog) {
	$cells[$pcol-1]=$ceiling if ($cells[$pcol-1] < $ceiling);
    }
    if ($chr){
	unless ($cells[$chrcol-1] == $chr){
	    $count_p--;
	    next;
	}
	if ($regstr){
	    unless ($cells[$poscol-1] > $regbeg && $cells[$poscol-1] < $regend){
		$count_p--;
		next;
	    }
	}
    }
    printf T5 "%s\t%s\t%s\t%s\n",$cells[$snpcol-1],$cells[$pcol-1],$cells[$chrcol-1],$cells[$poscol-1];

    $pc++;



#	print "$cells[0]\n";
}
close PF;
close T5;

#print "smallest: $smallest_p\n";
#my $smallest_p1 = log($smallest_p);
if ($smallest_p > $prekno_th) {
    my $smallest_p1 = sprintf "%i",log($smallest_p)/log(10);
    $prekno_th = 10 ** ($smallest_p1);
}
#print "smallest: $smallest_p1\n";
#print "smallest: $prekno_th\n";
#exit;

die "no values left" if ($pc == 0);

my $systempos="sort -k3,3n -k4,4n $outdir/$top5Kfile > $outdir/$positionfile";
&mysystem ($systempos);


## create Best_processed
die $! unless open IN , "< $outdir/$positionfile";
my $old_chr=0;
my $old_pos=0;
my $new_pos=0;
my $chrcol=2;
my $poscol=3;
my $snpcol=0;
my $pcol=1;
my @out_lines=();
my @prekno_new=();
while (<IN>){
    chomp;
    my @cells = &split_line($_);
    if ($cells[$chrcol] != $old_chr){
	$old_chr = $cells[$chrcol];
	$old_pos = $new_pos;
    }
    $new_pos = $old_pos + $cells[$poscol];	
    my $linestr=$cells[$snpcol];
    $linestr.="\t$cells[$chrcol]";
    $linestr.="\t$cells[$poscol]";
    $linestr.="\t$cells[$pcol]";
    $linestr.="\t$cells[$chrcol]";
    $linestr.="\t$new_pos";


    my $new = 0;
    if ($cells[$pcol] < $prekno_th){
	my $found = 0;
	foreach my $reg (@prekno_arr) {	
	    my ($chr_prekno, $start_prekno, $end_prekno) = &split_line($reg);
	    if ($chr_prekno == $cells[$chrcol]){
		if ($start_prekno < $cells[$poscol] && $end_prekno > $cells[$poscol]){
		    $found = 1;
		}
#		else {
#		    $new = 1;
#		}
	    }
	}
	$new = 1 if ($found == 0);
	
#	if ($new == 0) {
#	    print "prekno: $linestr" ;
#	}
#	else {
#	    print "new: $linestr" ;
#	}
    }
    $linestr.="\t$new\n";
    push @out_lines, $linestr;
    push @prekno_new, $linestr if ($new == 1);

}
close IN;

&a2file ( "$outdir/Best_processed.txt_tmp" , @out_lines);
&a2file ( "$outdir/prekno_new" , @prekno_new);


#exit;



##############################
##    prep ticks file
#############################

@out_lines=();
$old_chr=0;
die $! unless open IN , "< $outdir/Best_processed.txt_tmp";
my $mean_pos;
my $lc=0;
my @cells = ();
my $linestr;
while (<IN>){
    chomp;
    @cells = &split_line($_);
    if ($lc++ == 0){
	$old_pos=$cells[$#cells];
	$new_pos=$old_pos;
	
    }
    if ($cells[1] != $old_chr){
	$new_pos=$cells[5];
#	$new_pos=$cells[$#cells];
	$mean_pos=($old_pos+$new_pos)/2;
	$old_chr=$cells[1];
	$old_pos=$cells[5];
#	$old_pos=$cells[$#cells];
	
	$linestr=$cells[1];
	$linestr.="\t$new_pos";
	$linestr.="\t$mean_pos\n";
	push @out_lines, $linestr;
    }
    
}

$linestr=$cells[1];
$new_pos=$cells[5];
#$new_pos=$cells[$#cells];
$mean_pos=($old_pos+$new_pos)/2;
$linestr.="\t$new_pos";
$linestr.="\t$mean_pos\n";
push @out_lines, $linestr;

close IN;

&a2file ( "$outdir/ticks.txt_tmp" , @out_lines);

#print "@out_lines\n";
#exit;





######################################################################
##   R templates
######################################################################

    my $R_templ='
pdf("OUTNAME",title="GWA-Plot",PLOTSHAPE)



## png("OUTNAME.png",width = 11, height = 7.5, units="in",res=300)

read.table ("TICKNAME",header=F)-> pos 
read.table ("INNAME",header=F)-> asnp

ymin = min(asnp[,4]);

if (MAXY != 0){
  ymin = MAXY;
}

asnp[,"pch"]=19
asnp[asnp[,4]<PTHRESH,"pch"]=21


if (LOG){
plot ( 0,0, type="n",  ylim = c(-log10(max(asnp[,4])),-log10(ymin)), xlim = c(min(asnp[,6]),max(asnp[,6])), axes=F, xlab="", ylab="-log10(p_val)", main="TITLE", col=colors()[100], sub=paste("(n=",length(asnp[,4])," out of NUMBERSNPS)",sep=""), cex.sub=.8, cex.lab = 1.2)
abline (h=-log10(PTHRESH),col="grey",lwd=.5)
} else {
plot ( 0,0, type="n",  ylim = c(min(asnp[,4]),max(asnp[,4])), xlim = c(min(asnp[,6]),max(asnp[,6])), axes=F, xlab="", ylab="-log10(p_val)", main="TITLE", col=colors()[100], sub=paste("(n=",length(asnp[,4]),")",sep=""), cex.sub=.8, cex.lab = 1.2)
abline (h=PTHRESH,col="grey",lwd=.5)
}

if (asnp[1,2]%%2==1){
col1="black"
col2=colors()[100]
}else{
col2="black"
col1=colors()[100]
}


if (0) {
  liz_col = c("black","orange","green","blue","cyan","magenta","yellow","red","black","orange","green","blue","cyan","magenta","yellow","red","black","orange","green","blue","cyan","magenta","yellow","red","black","orange","green","blue","cyan","magenta","yellow","red")
  points (x=asnp[,6],y=-log10(asnp[,4]), col=liz_col[asnp[,2]], cex=0.8, pch=19 ) 


} else {
 if (LOG){


  asnp[,"gc"] = pchisq(qchisq(asnp[,4],1,lower.tail=F)/LAMBDA,1, lower.tail=F)


#print (head (asnp))
snp_out = asnp[ asnp[,"gc"]<PTHRESH , c("V1","V2","V3","V4","V7","gc")]
print (head (snp_out))


write.table (snp_out, file="OUTNAME.prekno.gc", row.names=F, quote=F)

#  asnp[,"gc"] = 1-pchisq(qchisq(1 - asnp[,4],1)/LAMBDA,1)

  points (x=asnp[asnp[,2]%%2==1,6],y=-log10(asnp[asnp[,2]%%2==1,"gc"]), col=col1, cex=0.5, pch=asnp[asnp[,2]%%2==1,"pch"] ) 
  points (x=asnp[asnp[,2]%%2==0,6],y=-log10(asnp[asnp[,2]%%2==0,"gc"]), col=col2, cex=0.5, pch=asnp[asnp[,2]%%2==0,"pch"])
  points (x=asnp[asnp[,4]==MAXY,6],y=-log10(asnp[asnp[,4]==MAXY,"gc"]), col="green", cex=0.8)
  points (x=asnp[asnp[,7] == 1,6],y=-log10(asnp[asnp[,7] == 1,"gc"]), col="blue", cex=1.2)



 } else {
  points (x=asnp[asnp[,2]%%2==1,6],y=asnp[asnp[,2]%%2==1,4], col=col1, cex=0.5)
  points (x=asnp[asnp[,2]%%2==0,6],y=asnp[asnp[,2]%%2==0,4], col=col2, cex=0.5)
 }
}

#print (asnp[asnp[,7] == 1,])

mtext("Chr / Position(kb)",1,3, cex=1.2)
#mtext("ripke @ chgr mgh harvard edu",4,cex=.3)

mtext("GENELIST",3,3, cex=0.3)

if (GENBEG != -1000){
abline (v=GENBEG,col=colors()[100])
abline (v=GENEND,col=colors()[100])
}

if (SIGGWA == 1){
abline (h=7.30103,col=colors()[100])


}

if (LOG) {
 ticks = round(seq.int(from = floor(min(-log10(asnp[,4]))), to = ceiling(-log10(ymin)), length.out =4))
#   axis(2,floor(min(-log10(asnp[,4]))):ceiling(-log10(ymin)), cex.axis=0.4, las=1, labels=ticks)
 print (ticks)
   axis(2, cex.axis=1.2, las=1, at= ticks,
   ,labels = ticks, lty= "solid")

} else {
axis(2,floor(min(asnp[,4])):ceiling(max(asnp[,4])), cex.axis=1.2, las =1)
}


axis(1, at= pos[,2], lab=F, lwd=3, cex.axis=1.2)
axis(1, at= pos[-1,3], lab=pos[1:(length(pos[,1])-1),1], lwd=3, cex.axis=.6, line=-.5, tick=F)
axis(1, at= seq(min(asnp[,6]),max(asnp[,6]),length=BPAX), lab = trunc(seq(min(asnp[,6]),max(asnp[,6]),length=BPAX)/(10^3)), las=1, line = 2, cex.axis=1.2, lty=3)

BESTSNPS

dev.off()

CHRPLOT
';


my $R_templ_cp ='


#############################    Chromosome plot

pdf("Chromosomeplot%03d.pdf",7.8,6,onefile=F);

read.table ("AREANAME",header=F)-> are   ### areas

ymax = -log10(min(asnp[,4]))


drawchrom <- function (ymax_chr) {

	    plot ( 0,0, type="n",  
		   ylim = c(-log10(max(asnp[,4])),ymax_chr), 
		   xlim = c(min(asnp[,3]),xmax), 
		   axes=F, 
		   xlab="", ylab="-log10(p_val)", 
		   main=paste ("TITLE, Chr. ",chr,", (n=",length(asnp[asnp[,2]==chr,4]),")",sep=""), col=colors()[100], 
		   cex.lab = 0.6,cex.main=.6)
	    
# here the area-makrs
	    if (chr %in% are[,1]){
		for (x in 1:dim(are)[1]){
		    if (are[x,1] == chr){
			abline(v=are[x,2],col=colors()[are[x,4]], lwd = .6)
			abline(v=are[x,3],col=colors()[are[x,4]], lwd = .6)
			if (are[x,5] == "OWNAREA"){
			    text(x=are[x,2], y=ymax_chr, pos=2 , srt=90, col=colors()[are[x,4]], labels = are[x,5], cex=.6);
			}
			else {
			    text(x=are[x,3], y=ymax_chr, pos=4 , srt=270, col=colors()[are[x,4]], labels = are[x,5], cex=.6);
			}
		    }
		}
	    }
	    mtext("ripke @ chgr mgh harvard edu",4,cex=.3)
	    
	    col1="black"
	    
	    points (x=asnp[asnp[,2]==chr,3],y=-log10(asnp[asnp[,2]==chr,4]), col=col1, cex=0.5)
	    xrange = trunc ((max(asnp[asnp[,2]==chr,3]) -  min(asnp[asnp[,2]==chr,3]))/1000);
	    mtext(paste ("Position, ",xrange," KB",sep = ""),side=1,line=3, cex=0.5,adj=max(asnp[asnp[,2]==chr,3])/xmax*0.5)

	    ## left axis
	    axis(2,floor(min(-log10(asnp[,4]))):ceiling(ymax_chr), cex.axis=0.4) ## left axis
	    axis(1, at= seq(min(asnp[asnp[,2]==chr,3]),max(asnp[asnp[,2]==chr,3]),length=BPAX), lab = trunc(seq(min(asnp[asnp[,2]==chr,3]),max(asnp[asnp[,2]==chr,3]),length=BPAX)/(10^3)), las=3, line = 0, cex.axis=0.5, lty=1)
	    

}



    
    par(mfrow = c(2, 1))
    
    xmax = max(asnp[,3]);
#ymax = -log10(min(asnp[,4]))
    
    for (chr in 1:25){
	
	if (length(asnp[asnp[,2]==chr,3]) > 5){
	    
	    if (chr == 23) {
		xmax=xmax*2
	    }

            if (chr %in% are[,1]) {
         	    drawchrom(9)

	            ymax_local = -log10(min(asnp[asnp[,2]==chr,4]))
	            if (ymax_local > 9 ){
	        	drawchrom(ymax_local)
	            }
            }

	    if (chr == 12) {
		par(mfrow = c(2, 2))
		xmax=xmax/2;
	    }
	    
	    if (chr == 22) {
		par(mfrow = c(2, 1))
                #    layout(matrix(c(1,2,3,4,5,5),3,2,byrow=T),heights=c(.33,.33,.33))
	    }
	    
	}
	
    }
dev.off();


';


## single chromosomes
    my $R_templ_nouse='
pdf("OUTNAME",6,7.8,title="GWA-Plot")
par(mfrow = c(4, 1))

read.table ("TICKNAME",header=F)-> pos 
read.table ("INNAME",header=F)-> a


plot ( 0,0, type="n",  ylim = c(-log10(max(asnp[,4])),-log10(min(asnp[,4]))), xlim = c(min(asnp[,6]),max(asnp[,6])), axes=F, xlab="", ylab="-log10(p_val)", main="TITLE", col=colors()[100], sub=paste("(n=",length(asnp[,4]),")",sep=""), cex.sub=0.4, cex.lab = 1.0)

if (asnp[1,2]%%2==1){
col1="black"
col2=colors()[100]
}else{
col2="black"
col1=colors()[100]
}

#points (x=asnp[asnp[,2]%%2==1,6],y=-log10(asnp[asnp[,2]%%2==1,4]), col=col1, cex=0.5)
#points (x=asnp[asnp[,2]%%2==0,6],y=-log10(asnp[asnp[,2]%%2==0,4]), col=col2, cex=0.5)
points (x=asnp[asnp[,2]%%2==1,6],y=(asnp[asnp[,2]%%2==1,4]), col=col1, cex=0.5)
points (x=asnp[asnp[,2]%%2==0,6],y=(asnp[asnp[,2]%%2==0,4]), col=col2, cex=0.5)


mtext("Chromosome/Position(KB)",1,3, cex=0.6)
#mtext("ripke @ chgr mgh harvard edu",4,cex=.3)

mtext("GENELIST",3,3, cex=0.3)
abline (v=GENBEG,col=colors()[100])
abline (v=GENEND,col=colors()[100])

axis(2,floor(min(-log10(asnp[,4]))):ceiling(max(-log10(asnp[,4]))), cex.axis=0.4)

axis(1, at= pos[,2], lab=F, lwd=3, cex.axis=1.2)
axis(1, at= pos[-1,3], lab=pos[1:(length(pos[,1])-1),1], lwd=3, cex.axis=1.2, line=-.5, tick=F)
axis(1, at= seq(min(asnp[,6]),max(asnp[,6]),length=BPAX), lab = trunc(seq(min(asnp[,6]),max(asnp[,6]),length=BPAX)/(10^3)), las=3, line = 2, cex.axis=1.2, lty=3)

BESTSNPS


par(mfrow = c(3, 1))

for (chr in 1:22){

plot ( 0,0, type="n",  ylim = c(-log10(max(asnp[,4])),-log10(min(asnp[,4]))), xlim = c(min(asnp[asnp[,2]==chr,6]),max(asnp[asnp[,2]==chr,6])), axes=F, xlab="", ylab="-log10(p_val)", main=paste ("TITLE, Chr",chr,sep=""), col=colors()[100], sub=paste("(n=",length(asnp[asnp[,2]==chr,4]),")",sep=""), cex.sub=0.4, cex.lab = 0.6)

points (x=asnp[asnp[,2]==chr,6],y=-log10(asnp[asnp[,2]==chr,4]), col="black", cex=0.5)

mtext("Chromosome/Position(KB)",1,3, cex=0.6)
#mtext("ripke @ chgr mgh harvard edu",4,cex=.3)

#mtext("GENELIST",3,3, cex=0.3)
#abline (v=GENBEG,col=colors()[100])
#abline (v=GENEND,col=colors()[100])

axis(2,floor(min(-log10(asnp[,4]))):ceiling(max(-log10(asnp[,4]))), cex.axis=0.4)

axis(1, at= pos[,2], lab=F, lwd=3, cex.axis=0.4)
#axis(1, at= pos[-1,3], lab=pos[1:(length(pos[,1])-1),1], lwd=3, cex.axis=0.4, line=-.5, tick=F)
#axis(1, at= seq(min(asnp[chr,6]),max(asnp[chr,6]),length=BPAX), lab = trunc(seq(min(asnp[chr,6]),max(asnp[chr,6]),length=BPAX)/(10^3)), las=3, line = 2, cex.axis=0.4, lty=3)

#BESTSNPS
}





dev.off()
';



my $best_num_str='
asnp[order(asnp[,4]),][1:BESTNUM,] -> best_snps
text (x=best_snps[,6],y=-log10(best_snps[,4]),best_snps[,1], pos=4 , cex=0.6)
points (x=best_snps[,6],y=-log10(best_snps[,4]), pch=19, col=colors()[497], cex=0.6)" 
';




###################################
##    preparate template
###################################

$R_templ_cp =~ s/AREANAME/$areafile/g ;
$R_templ =~ s/CHRPLOT/$R_templ_cp/g if ($areafile ne "");
$R_templ =~ s/CHRPLOT//g if ($areafile eq "");

$R_templ =~ s/NUMBERSNPS/$nsnps/g;
$R_templ =~ s/PTHRESH/$pth/g;
$R_templ =~ s/OUTNAME/$pdfout/g;
$R_templ =~ s/TICKNAME/$outdir\/ticks.txt_tmp/g;
$R_templ =~ s/INNAME/$outdir\/Best_processed.txt_tmp/g;
$R_templ =~ s/TITLE/$title/g;
$R_templ =~ s/BPAX/2/g unless $chr;
if ($chr){
    $R_templ =~ s/BPAX/6/g unless $regstr;
    $R_templ =~ s/BPAX/4/g if $regstr;
}
$R_templ =~ s/PLOTSHAPE/6,6/g if $regstr;
$R_templ =~ s/PLOTSHAPE/8.7,6/g unless $regstr;
if ($ntop){
    $best_num_str =~ s/BESTNUM/$ntop/g;
    $R_templ =~ s/BESTSNPS/$best_num_str/g;
}
else {
    $R_templ =~ s/BESTSNPS//g;
}

$R_templ =~ s/GENELIST/$gene_list_str/g;
$R_templ =~ s/GENBEG/$genbeg/g;
$R_templ =~ s/GENEND/$genend/g;
$R_templ =~ s/SIGGWA/1/g if ($sig_sw);

$R_templ =~ s/SIGGWA/0/g unless ($sig_sw);
$R_templ =~ s/LOG/1/g unless ($nolog);
$R_templ =~ s/LOG/0/g if ($nolog);
$R_templ =~ s/LAMBDA/$lambda/g;

$R_templ =~ s/MAXY/$maxy/g;




#print "$genbeg\n";
#print "$genend\n";

&a2file ( "$outdir/R_GWAplot.in_tmp" , $R_templ);




#######################################
##     start R
#######################################



#my $systemGWA="source /broad/software/scripts/useuse; use R-2.14; R < $outdir/R_GWAplot.in_tmp --vanilla\n";
my $systemGWA="$r_sys < $outdir/R_GWAplot.in_tmp --save --no-restore > $outdir/R_GWA_log.txt 2>&1\n";
&mysystem ($systemGWA);

&mysystem ("shrinkpdf $pdfout");

&mysystem ("cp $pdfout $rootdir");
&mysystem ("cp $outdir/prekno_new $rootdir/$pdfout.prekno_new");
&mysystem ("cp $pdfout.prekno.gc $rootdir/$pdfout.prekno_new.gc");

&mysystem ("tar -czf $pdfout.tar.gz $outdir/R_GWAplot.in_tmp $outdir/ticks.txt_tmp $outdir/Best_processed.txt_tmp");
&mysystem ("cp $pdfout.tar.gz $rootdir");
&mysystem ("rm -rf  $workdir");

print "$pdfout\n\n" if ($debug);



#here plot for snps per chromosome

#> plot(h,main=paste ("SNPs per Chromosome (sum=", length(a[,2]),")",sep=""),xlab="Chr",ylab="SNP counts",axes=F)
#> axis(1,at=1:22)
#> axis(2)
#> text(x=h$counts,adj=c(1.5,0.5),labels=h$counts,cex=0.7,srt=90)
#> h<-hist(a[,2],breaks=(0:22)+0.5,plot=F)



# much better
#trip<-table(a[,2])
#> rip<-barplot (trip,col=rainbow(22),cex.names=.8,xlab="Chr",main=paste ("SNPs per Chromosome (sum=", length(a[,2]),")",sep=""))
#> text(x=rip,y=0,as.data.frame(trip)[,2],srt=90,adj=c(-0.5,0.5))



#read.table("mdd_stard_eur_QC3.fam")->fam
#barplot(table (fam[,5:6]))
