#!/usr/bin/perl
## CONVERTS PHASED IMPUTE2 OUTPUT TO CHROMOPAINTER-STYLE INPUT FILES

### AUTHOR: Daniel Lawson (dan.lawson@bristol.ac.uk)
### See "help" for details (run with no options)
### Copyright 2013 Daniel Lawson
### LICENCE: GPLv3
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.

#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.


sub help {
print("CONVERTS PHASE FORMAT TO IMPUTE2/SHAPEIT HAPS FILES\n");
print("   copyright Daniel Lawson (dan.lawson@bristol.ac.uk) 2013, released as GPLv3\n");
print("usage:   perl chromopainter2impute2.pl <options> input.phase output.haps\n");

print("where:\n");
print("        (i) input.phase EITHER phase style or chromopainter format (i.e. containing an extra header line)\n");
print("        (ii) output.haps = impute2 output file\n\n");

print("<options>:\n");
print("-q               Quiet mode.\n");
print("-h               This help file.\n\n");
print("WARNING: No attempt is made to check that a valid file is provided. Check the output matches what you expect! Use at your own risk! It can also be memory intensive.\n");

die "\n";
}

use Switch;

sub trim($){  # remove whitespace from beginning and end of the argument
        my $string = shift;
        $string =~ s/^\s+//;
        $string =~ s/\s+$//;
        return $string;
}



###############################
## INPUT:



use Switch;
use Getopt::Long;
use strict;

###############################
## ARGUMENT PROCESSING

my $quiet=0;
my $verbose=1;
my $help=0;

my @posvec; # location of ALL snps


my $totalINDS=-1; # total number of individuals in the file
my $Mb = 1000000.0;

my $infile="";
my $outfile="";

GetOptions ('h|help' => \$help, 
	    'q|quiet' => \$quiet);

$verbose=1-$quiet;

if($help) {help();}
if(scalar(@ARGV)==0) {help();}
if(scalar(@ARGV)!=2) {
    print "Incorrect number of arguments!\n";
    help();
}
my $fileon=0;
$infile="$ARGV[$fileon]"; ++$fileon;
$outfile="$ARGV[$fileon]"; ++$fileon;


##############################
## PROGRAM:

   ## (I) GET RECOM-RATE INFO:

my $line;
my @linearray; # temporary array of the current line
my @genomat=();

   
   ## (II) GET NUMBER OF SITES AND INDS: 
if($verbose){ print("Reading Chromopainter format file $infile...\n");}
open IN,"$infile" or die "Could not open input file $infile\n";
#open OUT,"$outfile", ">" or die "Could not open output file $outfile\n";
open OUT, ">", $outfile or die $!;

my $ninds=0;
my $nhaps=0;
my $nsnps=0;
my $ploidy=2;
## read the PHASEFILE header
my $skip=1;
my @tmarr;
my $detectv2=1;
while ($skip) {
	my $tmp=<IN>;
	my @tmpvals = split(/ /, $tmp);
	if($tmpvals[0] eq "P"){ # found the line with all the SNP locations
	    $tmp=trim($tmp);
		@posvec= split(/ /, $tmp);
		shift @posvec;
		my $floc=tell(PHASEFILE);
#		$tmp=<IN>; # read the line of S's, if it exists
		if(substr($tmp, 0, 1) eq "S"){
		    $detectv2=0;
		}
		$nsnps=trim(pop @tmarr);
		$ninds=trim(pop @tmarr);
		$nhaps=$ninds*$ploidy;
		$skip=0;
	}else {
		push @tmarr, $tmpvals[0];
	}
}
if($detectv2==0){
    print "Detected Chromopainter v1 format\n";
    print "Detected $ninds individuals\n";
    $line=<IN>; # S line
}else{
    print "Detected Chromopainter v2 format\n";
    print "Detected $ninds haplotypes\n";
}
print "And $nsnps SNPs\n";


if($verbose){ 
    print("Read INDS: $ninds\n");
    print("Read SNPS: $nsnps\n");
}
my $nhaps=0;
    while(<IN>)
    {
	$line=trim($_);
	@linearray=split(//,$line);
	for (my $i=0; $i < $nsnps; $i+=1)
	{
	    $genomat[$nhaps][$i]=$linearray[$i];
	}
	$nhaps++;
    }

if($verbose){ 
    print("Read $nhaps haplotype lines\n");
    my $tploidy=$nhaps/$ninds;
    print("This implies a ploidy of $tploidy\n");
}
                                ## print out:	
   for (my $i=0; $i < $nsnps; $i+=1)
   {
       print OUT "0:$posvec[$i] 0:$posvec[$i] $posvec[$i] A C";
       for (my $j=0; $j < $nhaps; $j+=1)
       {
	   print OUT " $genomat[$j][$i]";
       }
       print OUT "\n";
   }

close IN;
close OUT;
