#include <vector>
#include <string>
#include <cstring>
#include <fstream>
#include <sstream>
#include <iostream>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdexcept>
#include <exception>
#include <iostream>
#include <limits>
#include <string>
#include <algorithm>

/////////////////////////////
// Not cross platform?
#ifdef _OPENMP
#include <omp.h>
#endif

#include <sys/types.h>
#include <sys/stat.h>
/////////////////////////////

#include "cp/ChromoPainterMutEM.h"
#include "chromocombine/ChromoCombine.h"
#include "finestructure/fines.h"
#include "finestructure/fsxml.h"
#include "fssettings.h"
#include "fsutils.h"
#include "fsproject.h"
#include "fsconstants.h"
#include "alphanum.hpp"

using namespace std;


//////////////////////////////////////
FsProject::FsProject(std::string f,std::string d,bool verbose) 
{
  exec="fs";
  numthreads=0;
  nstages=5;
  allowdep=true;
  indsperproc=0;
  stage=0;
  outputlogfiles=true;

  validatedoutput=vector<int>(5,0);

  s1commandfile=string("");
  s2commandfile=string("");
  s3commandfile=string("");
  s4commandfile=string("");

  // chromopainter things
  ploidy=2;
  hpc=0;
  linkagemode=string("unlinked");
  s12args=string("");
  s1args=string("-in -iM --emfilesonly");
  s1minsnps=10000;// default: 10K
  s1snpfrac=0.1;// fraction of genome we will use for EM
  s1indfrac=1.0;// fraction of inds we will use for EM
  s1emits=10;// number of EM iterations
  s2args=string("");
  s2combineargs=string("");
  s34args=string("");
  Neinf=-1; // default: we haven't got estimates
  muinf=-1; // default: we haven't got estimates
  s2chunksperregion=-1; // default: can be wrong for unlinked data!
  s2samples=0; // default: we don't want samples
  cval=-1;// inferred "c" (we don't use this except for information, we set from the files)

  // finestructure things
  s3iters=numitersDefault;
  maxretained=500;
  s3iterssample=-1;
  s3itersburnin=-1;
  numskip=-1;
  nummcmcruns=2;

  fscounter=0;
  fsmaxattempts=5;
  threshGR=1.3;
  // finestructure tree things
  s4iters=numitersDefault;
  s4args=string("");

  this->verbose=verbose;
  setFileName(f);
  setDirectoryName(d);
  setupSections();

  defineParameters();
  defineCommands();
}


FsProject::~FsProject() 
{
}
///////////////////////////////

void FsProject::defineParameters()
{
  ////// Universal properties
  sectionnames.clear();
  sectioncomments.clear();
  pars.clear();

  sectionnames.push_back("0");
  sectioncomments.push_back("Data preparation and conversion. Not yet implemented");
  pars.push_back(FsPar("validatedoutput",-1,"Derived. Whether we have validated output from each stage of the analysis (0-4)",1));
  parsize.push_back(pars.size());

  sectionnames.push_back("1");
  sectioncomments.push_back("Universal Stage1-4 properties of the analysis");
  pars.push_back(FsPar("exec",-1,"Finestructure command line. Set this to be able to use a specific version of this software. (default: fs)"));
  pars.push_back(FsPar("hpc",-1,"THIS IS IMPORTANT FOR BIG DATASETS! Set hpc mode. 0: Commands are run 'inline' (see 'numthreads' to control how many CPU's to use). 1: Stop computation for an external batch process, creating a file containing commands to generate the results of each stage. 2: Run commands inline, but create the commands for reference. (default: 0.)"));
  pars.push_back(FsPar("numthreads",-1,"Maximum parallel threads in 'hpc=0' mode. Default: 0, meaning all available CPUs."));
  pars.push_back(FsPar("ploidy",-1,"Haplotypes per individual. =1 if haploid, 2 if diploid. (default: 2)"));
  pars.push_back(FsPar("linkagemode",-1,"unlinked/linked. Whether we use the linked model. default: unlinked / linked if recombination files provided."));
  pars.push_back(FsPar("indsperproc",-1,"Desired number of individuals per process (default: 0, meaning autocalculate: use 1 In HPC mode, ceiling(N/numthreads) otherwise. Try to choose it such that you get a sensible number of commands compared to the number of cores you have available."));
  pars.push_back(FsPar("outputlogfiles",-1,"1=Commands are written to file with redirection to log files. 0: no redirection. (default:1)"));
  pars.push_back(FsPar("allowdep",-1,"Whether dependency resolution is allowed. 0=no, 1=yes. Main use is for pipelining. (default:1)."));
  parsize.push_back(pars.size());

  /// Chromopainter Stage1-2 generic properties
  sectionnames.push_back("1");
  sectioncomments.push_back("ChromoPainter Stage1-2 generic properties");
  pars.push_back(FsPar("s12inputtype",1,"What type of data input (currently only \"phase\" supported)"));
  pars.push_back(FsPar("idfile",1,"IDfile location, containing the labels of each individual. REQUIRED, no default (unless -createids is used)."));
  pars.push_back(FsPar("s12args",1,"arguments to be passed to Chromopainter (default: empty)"));
  parsize.push_back(pars.size());

  //Quantities observed from data
  sectionnames.push_back("1");
  sectioncomments.push_back("Quantities observed from data");
  pars.push_back(FsPar("ninds",-1,"Derived. number of individuals observed in the idfile",1));
  pars.push_back(FsPar("nindsUsed",-1,"Derived. number of individuals retained for processing from the idfile",1));
  pars.push_back(FsPar("nsnps",-1,"Derived. number of SNPs in total, over all files",1));
  parsize.push_back(pars.size());

  //chromopainter stage1 (EM)
  sectionnames.push_back("1");
  sectioncomments.push_back("ChromoPainter Stage1 (EM) properties");
  pars.push_back(FsPar("s1args",1,"Arguments passed to stage1 (default:-in -iM --emfilesonly)"));
  pars.push_back(FsPar("s1emits",1,"Number of EM iterations (chromopainter -i <n>, default: 10)"));
  pars.push_back(FsPar("s1minsnps",1,"Minimum number of SNPs for EM estimation (for chromopainter -e, default: 10000)"));
  pars.push_back(FsPar("s1snpfrac",1,"fraction of genome to use for EM estimation. (default: 0.1)"));
  pars.push_back(FsPar("s1indfrac",1,"fraction of individuals to use for EM estimation. (default: 1.0)"));
  pars.push_back(FsPar("s1outputroot",1,"output file for stage 1 (default is autoconstructed from filename)"));
  parsize.push_back(pars.size());

  //chromopainter inferred properties for stage2 from stage1
  sectionnames.push_back("1");
  sectioncomments.push_back("ChromoPainter Stage2 properties inferred from Stage1");
  pars.push_back(FsPar("Neinf",-1,"Derived. Inferred `Effective population size Ne' (chromopainter -n).",1));
  pars.push_back(FsPar("muinf",-1,"Derived. Inferred Mutation rate mu (chromopainter -M)",1));
  parsize.push_back(pars.size());
  
  //chromopainter stage2 (painting)
  sectionnames.push_back("2");
  sectioncomments.push_back("ChromoPainter Stage2 (main run) properties");
  pars.push_back(FsPar("s2chunksperregion",2,"number of chunks in a \"region\" (-ve: use default of 100 for linked, nsnps/100 for unlinked)"));
  pars.push_back(FsPar("s2samples",2,"number of samples of the painting to obtain per recipient haplotype, for examining the details of the painting. (Populates <root>.samples.out; default 0. Warning: these file can get large)"));
  pars.push_back(FsPar("s2args",2,"Additional arguments for stage 2 (default: none, \"\")"));
  pars.push_back(FsPar("s2outputroot",2,"Output file name for stage 2 (default: autoconstructed)."));
  pars.push_back(FsPar("s2combineargs",2,"Additional arguments for stage 2 combine (fs combine; default: none, \"\")"));
  parsize.push_back(pars.size());

  // chromocombine inferred properties for stage3 from stage2
  sectionnames.push_back("2");
  sectioncomments.push_back("FineSTRUCTURE Stage3 properties inferred from Stage2");
  pars.push_back(FsPar("cval",-1,"Derived. 'c' as inferred using chromopainter. This is only used for sanity checking. See s34 args for setting it manually.",1));

  pars.push_back(FsPar("cproot",3,"The name of the final chromopainter output. (Default: <filename>, the project file name)"));
  pars.push_back(FsPar("cpchunkcounts",3,"the finestructure input file, derived name of the chunkcounts file from cproot.",1));
  parsize.push_back(pars.size());

  // finestructure stage3-4 generic properties
  sectionnames.push_back("3");
  sectioncomments.push_back("FineSTRUCTURE Stage3-4 generic properties");
  pars.push_back(FsPar("fsroot",3,"The name of the finestructure output (Default: <filename>, the project file name)."));
  pars.push_back(FsPar("s34args",3,"Additional arguments to both finestructure mcmc and tree steps. Add \"-c <val>\" to manually override 'c'."));
  parsize.push_back(pars.size());

  // finestructure stage3 MCMC inference
  sectionnames.push_back("3");
  sectioncomments.push_back("FineSTRUCTURE Stage3 MCMC inference");
  pars.push_back(FsPar("s3iters",3,"Number of TOTAL iterations to use for MCMC. By default we assign half to burnin and half to sampling. (default: 100000)"));
  pars.push_back(FsPar("s3iterssample",3,"Number of iterations to use for MCMC (default: -ve, meaning derive from s3iters)"));
  pars.push_back(FsPar("s3itersburnin",3,"Number of iterations to use for MCMC burnin (default: -ve, meaning derive from s3iters)"));
  pars.push_back(FsPar("numskip",3,"Number of mcmc iterations per retained sample; (default: -ve, meaning derive from maxretained)"));
  pars.push_back(FsPar("maxretained",3,"Maximum number of samples to retain when numskip -ve. (default: 500)"));
  pars.push_back(FsPar("nummcmcruns",3,"Number of *independent* mcmc runs. (default: 2)"));
  pars.push_back(FsPar("fsmcmcoutput",3,"Filename to use for mcmc output (default: autogenerated)"));
  pars.push_back(FsPar("mcmcGR",3,"Derived. Gelman-Rubin diagnostics obtained from combining MCMC runs, for log-posterior, K,log-beta,delta,f respectively"));
  pars.push_back(FsPar("threshGR",3,"Threshold for the Gelman-Rubin statistic to allow moving on to the tree building stage. We always move on if thresGR<0. (Default: 1.3)"));
  parsize.push_back(pars.size());

  //stage 4 finestructure tree inference
  sectionnames.push_back("4");
  sectioncomments.push_back("FineSTRUCTURE Stage4 tree inference");
  pars.push_back(FsPar("s4args",4,"Extra arguments to the tree building step. (default: none, \"\")"));
  pars.push_back(FsPar("s4iters",4,"Number of maximization steps when finding the best state from which the tree is built. (default: 100000)"));
  pars.push_back(FsPar("fstreeoutput",4,"Filename to use for finestructure tree output. (default: autogenerated)"));
  parsize.push_back(pars.size());

  // Vector quantities
  sectionnames.push_back("1");
  sectioncomments.push_back("Vector quantities placed at the end for readability");
  pars.push_back(FsPar("phasefiles",1,"Comma or space separated list of all 'phase' files containing the (phased) SNP details for each haplotype. Required. Must be sorted alphanumerically to ensure chromosomes are correctly ordered. So don't use *.phase, use file{1..22}.phase. Override this with upper case -PHASEFILES."));
  pars.push_back(FsPar("recombfiles",1,"Comma or space separated list of all recombination map files containing the recombination distance between SNPs. If provided, a linked analysis is performed. Otherwise an 'unlinked' analysis is performed. Note that linkage is very important for dense markers!"));
  pars.push_back(FsPar("nsnpsvec",-1,"Derived. Comma separated list of the number of SNPs in each phase file.",1));
  pars.push_back(FsPar("s1outputrootvec",-1,"Derived. Comma separated list of the stage 1 output files names.",1));

  parsize.push_back(pars.size());

  sectionnames.push_back("2");
  sectioncomments.push_back("");
  pars.push_back(FsPar("s2outputrootvec",-1,"Derived. Comma separated list of the stage 2 output files names.",1));
  parsize.push_back(pars.size());

  sectionnames.push_back("3");
  sectioncomments.push_back("");
  pars.push_back(FsPar("fsmcmcoutputvec",-1,"Derived. Comma separated list of the stage 3 output files names.",1));
  pars.push_back(FsPar("old_fsmcmcoutputvec",-1,"Derived. Comma separated list of the stage 3 output files names, if we need to continue a too-short MCMC run.",1));

  parsize.push_back(pars.size());

  sectionnames.push_back("4");
  sectioncomments.push_back("");
  pars.push_back(FsPar("fstreeoutputvec",-1,"Derived. Comma separated list of the stage 4 output files names.",1));
  parsize.push_back(pars.size());
  pars.push_back(FsPar("stage",-1,"Derived. Don't mess with this! The internal measure of which stage of processing we've reached. Change it via -reset or -duplicate.",1));
}

void FsProject::defineCommands()
{
  cmds.clear();
  cmds.push_back(FsCmd("go",-1,"","Do the next things that are necessary to get a complete set of finestructure runs."));
  cmds.push_back(FsCmd("import",-1,"<file>","Import some settings from an external file. If you need to set any non-trivial settings, this is the way to do it. See \"fs -hh\" for more details."));
  ///////////////
  cmds.push_back(FsCmd("createid",1,"<filename>","Create an ID file from a PROVIDED phase file. Individuals are labelled IND1-IND<N>."));

  cmds.push_back(FsCmd("reset",-1,"<stage>","Reset the processing to an earlier point. Further \"-go\" commands will rerun any activity from this point. Helpful for rerunning with different parameters."));
  cmds.push_back(FsCmd("duplicate",-1,"<stage> <newfile.cp>","Copy the information from the current settings file, and then -reset it to <stage>."));
  cmds.push_back(FsCmd("configmcmc",3,"<s3itersburnin> <s3iterssample> <numskip>","Shorthand for setting the parameters of the FineSTRUCTURE MCMC. Takes arguments in the form of finestructure's -x -y -z parameters."));
  cmds.push_back(FsCmd("ignoreGR",3,"","Reset the MCMC files to a previously completed but unconverged run, allowing processing to proceed as though it were converged."));

  ///////////////
  cmds.push_back(FsCmd("haploid",1,"","Shorthand for the parameter `ploidy:1'"));
  cmds.push_back(FsCmd("countdata",1,"","Ends stage0. Performs checks on the data and confirms that we have valid data."));
  cmds.push_back(FsCmd("makes1",1,"","Make the stage1 commands."));
  cmds.push_back(FsCmd("makes2",2,"","Make the stage2 commands."));
  cmds.push_back(FsCmd("makes3",3,"","Make the stage3 commands."));
  cmds.push_back(FsCmd("makes4",3,"","Make the stage4 commands."));
  cmds.push_back(FsCmd("dos1",1,"","Do the stage1 commands. This we should only be doing in single machine mode; we use -writes1 in HPC mode."));
  cmds.push_back(FsCmd("dos2",2,"","Do the stage2 commands. This we should only be doing in single machine mode; we use -writes2 in HPC mode."));
  cmds.push_back(FsCmd("dos3",3,"","Do the stage3 commands. This we should only be doing in single machine mode; we use -writes3 in HPC mode."));
  cmds.push_back(FsCmd("dos4",4,"","Do the stage4 commands. This we should only be doing in single machine mode; we use -writes4 in HPC mode."));
  cmds.push_back(FsCmd("writes1",1,"<optional filename>","Write the stage1 commands to file, which we only need in HPC mode. In single machine mode we can instead use -dos1."));
  cmds.push_back(FsCmd("writes2",2,"<optional filename>","Write the stage2 commands to file, which we only need in HPC mode. In single machine mode we can instead use -dos2."));
  cmds.push_back(FsCmd("writes3",3,"<optional filename>","Write the stage3 commands to file, which we only need in HPC mode. In single machine mode we can instead use -dos3."));
  cmds.push_back(FsCmd("writes4",4,"<optional filename>","Write the stage4 commands to file, which we only need in HPC mode. In single machine mode we can instead use -dos4."));
  cmds.push_back(FsCmd("combines1",1,"","Ends stage1 by combining the output of the stage1 commands. This means estimating the parameters mu and Ne from the output of stage1."));
  cmds.push_back(FsCmd("combines2",2,"","Ends stage2 by combining the output of the stage2 commands. This means estimating 'c' and creating the genome-wide chromopainter output for all individuals."));
  cmds.push_back(FsCmd("combines3",2,"","Ends stage3 by checking the output of the stage3 commands."));
  cmds.push_back(FsCmd("combines4",2,"","Ends stage4 by checking the output of the stage4 commands."));

  //cpname
  //fsmcmcname
}

///////////////////////////////
void FsProject::addHistory(std::vector<std::string> args)
{
  std::ostringstream hist;
  hist<<"CMDLINE: ";
  for(unsigned int c1=0;c1<args.size();++c1){
    hist<<args[c1]<<" ";
  }
  hist<<endl;
  historytext.append(hist.str());

}

void FsProject::switchStdout(const char *newStream)
{
  fflush(stdout);
  fgetpos(stdout, &stdout_pos);
  stdout_fd = dup(fileno(stdout));
  FILE * ftest = freopen(newStream, "w", stdout);
  if(ftest==NULL) throw(runtime_error("Could not switch log file!"));
}

void FsProject::revertStdout()
{
  fflush(stdout);
  dup2(stdout_fd, fileno(stdout));
  close(stdout_fd);
  clearerr(stdout);
  fsetpos(stdout, &stdout_pos);
}

bool FsProject::finishedStage(int teststage)
{
  if(stage>teststage) return(true);
  return(false);
}

int FsProject::getHpc(){
  return(hpc);
}

void FsProject::do_omp_set_num_threads(int numthreads){
#ifdef _OPENMP
  omp_set_num_threads(numthreads);
#endif
}

int FsProject::get_omp_get_thread_num(){
#ifdef _OPENMP
  return(omp_get_thread_num());
#else
  return(0);
#endif
}

int FsProject::get_omp_get_num_threads(){
#ifdef _OPENMP
  return(omp_get_num_threads());
#else
  return(1);
#endif
}

int FsProject::getIndsPerProc(){
  if(indsperproc>0) return(indsperproc);
  if(hpc) return(1);
  if(numthreads>0) return(ceil(((double)nindsUsed)/numthreads));
  // *************** FIXME!

  int tnumthreads = 1,th_id;
  #pragma omp parallel private(th_id)
  {
    th_id = get_omp_get_thread_num();
    if ( th_id == 0 ) {
      tnumthreads  = get_omp_get_num_threads();
    }
  }
  return(ceil(((double)nindsUsed)/tnumthreads));
}

int FsProject::defaultChunksperregion(){
  if(linkagemode.compare("linked")==0) return(100);
  else if(linkagemode.compare("unlinked")==0) {
    if(nsnps<0) throw(logic_error("s2chunksperregion"));
    return(max(10,(int)(nsnps/100)));
  }else{
    throw(logic_error("s2chunksperregion"));
  }
}

string FsProject::getCommandFile(int stage){
  if(stage<0) stage=this->stage;
  switch(stage){
  case 1: return(s1commandfile);
  case 2: return(s2commandfile);
  case 3: return(s3commandfile);
  case 4: return(s4commandfile);
  }
  return(string(""));
}

int FsProject::getCommandFileCount(int stage){
  if(stage<0) stage=this->stage;
  switch(stage){
  case 1: return(s1commands.size());
  case 2: return(s2commands.size());
  case 3: return(s3commands.size());
  case 4: return(s4commands.size());
  }
  return(-1);
}

string FsProject::whichLinkagemode()
{
  if(recombfiles.size()>0) return("linked");
  return("unlinked");
}


//////////////////////////////////
bool FsProject::applyVal(std::vector<std::string> args)
{
  /*for(unsigned int c1=0;c1<args.size();c1++){
    cout<<"DEBUG2 :\""<<args[c1]<<"\""<<endl;
    }*/
  FsSettingsValue val(args);
  //cout<<"DEBUG2 :\""<<val.getName()<<"\" \""<<val.getVal()<<"\""<<endl;
  if(val.success) return(applyVal(val));
  else{
    cerr<<"ERROR: Tried to set a parameter, but this failed. Is this a malformed command line?"<<endl;
    cerr<<"HELP for "<<cmdInfo(args[0],true)<<endl;
    throw(runtime_error("Invalid parameter setting"));
  }
  return(false);
}

bool FsProject::applyVal(FsSettingsValue val)
{
  string name=val.getName();
  //  cout<<"DEBUG :\""<<name<<"\" \""<<val.getVal()<<"\" stage "<<stage<<endl;
  bool found=false;

  //////////// COMMENT 
  if(name.compare("CMD")==0){
    historytext.append(val.getEndedLine()); found=true;
  }else if(name.compare("CMDLINE")==0){
    historytext.append(val.getEndedLine()); found=true;
  }else if(name.compare("stage")==0){
    stage=val.getValAsInt();found=true;
  }

  if(found){
    if(verbose) cout<<"Successfully read "<<val.getName()<<":\""<<val.getVal()<<"\""<<endl;
    return(found);
  }
  //////////// STAGE1

  if(name.compare("exec")==0){
    exec=val.getVal(); found=true;
  }else if(name.compare("verbose")==0){
    verbose=val.getValAsInt(); found=true;
  }else if(name.compare("fsfile")==0){
    fsfile=val.getVal(); found=true;
  }else if(name.compare("allowdep")==0){
    allowdep=val.getValAsInt(); found=true;
  }else if(name.compare("hpc")==0){
    hpc=val.getValAsInt(); found=true;
  }else if (name.compare("ploidy")==0){
    ploidy=val.getValAsInt(); found=true;
  }else if (name.compare("linkagemode")==0){
    linkagemode=val.getVal(); found=true;
  }else if (name.compare("indsperproc")==0) {
    indsperproc=val.getValAsInt(); found=true;
  }else if (name.compare("outputlogfiles")==0) {
    outputlogfiles=val.getValAsInt(); found=true;
  }else if (name.compare("allowdep")==0) {
    allowdep=val.getValAsInt(); found=true;
  }else if (name.compare("validatedoutput")==0) {
    validatedoutput=val.getValAsIntVec(); found=true;
  }

  // STAGE1-2 generics
  if(name.compare("s12inputtype")==0){
    s12inputtype=val.getVal(); found=true;
  }else if ((name.compare("phasefiles")==0)||(name.compare("PHASEFILES")==0)) {
    vector<string> oldphase=phasefiles;
    phasefiles=val.getValAsStringVec(); found=true;
    s12inputtype=string("phase");
    if(unique(phasefiles).size()!=phasefiles.size()){// duplicates
      cerr<<"ERROR: duplicated phase files provided! You only need to specify each phase file once, and you should not use -phasefiles in future commands."<<endl;
      phasefiles=oldphase;
      throw(runtime_error("duplicate phase files"));
    }
    if(name.compare("PHASEFILES")==0){
      name=string("phasefiles");
    }else if(stage==0 && !std::is_sorted(phasefiles.begin(),phasefiles.end(),doj::alphanum_less<std::string>())){
      cerr<<"ERROR: Phase files are not lexicographically sorted, so you probably will get confusing assignments of chromosomes to file indices. Rerun with -PHASEFILES instead of -phasefiles to override this check, or rerun with sorted files (e.g. in bash: -phasefiles file{1..22}.phase)."<<endl;
      phasefiles=oldphase;
      throw(runtime_error("unsorted phase files"));
    }
  }else if (name.compare("recombfiles")==0) {
    vector<string> oldrec=recombfiles;
    recombfiles=val.getValAsStringVec(); found=true;
    if(recombfiles.size()>0) linkagemode="linked";
    if(unique(recombfiles).size()!=recombfiles.size()){// duplicates
      recombfiles=oldrec;
      cerr<<"ERROR: duplicated recomb files provided! You only need to specify each recombination file once, and you should not use --recombfiles in future commands."<<endl;
      throw(runtime_error("duplicate recombination files"));
    }
  }else if (name.compare("idfile")==0) {
    idfile=val.getVal(); found=true;
  }else if (name.compare("s12args")==0) {
    s12args=val.getVal(); found=true;
  }else if (name.compare("ninds")==0) {
    ninds=val.getValAsInt(); found=true;
  }else if (name.compare("nindsUsed")==0) {
    nindsUsed=val.getValAsInt(); found=true;
  }else if (name.compare("nsnps")==0) {
    nsnps=val.getValAsInt(); found=true;
  }else if (name.compare("nsnpsvec")==0) {
    nsnpsvec=val.getValAsIntVec(); found=true;
  }else if (name.compare("numthreads")==0) {
    numthreads=val.getValAsInt(); found=true;
  }
  // STAGE1 properties
  if (name.compare("s1args")==0) {
    s1args=val.getVal(); found=true;
  }else if (name.compare("s1emits")==0) {
    s1emits=val.getValAsInt(); found=true;
  }else if (name.compare("s1outputroot")==0) {
    s1outputroot=val.getVal(); found=true;
  }else if (name.compare("s1outputrootvec")==0) {
    s1outputrootvec=val.getValAsStringVec(); found=true;
  }else if (name.compare("s1minsnps")==0) {
    s1minsnps=val.getValAsInt(); found=true;
  }else if (name.compare("s1snpfrac")==0) {
    s1snpfrac=val.getValAsDouble(); found=true;
  }else if (name.compare("s1indfrac")==0) {
    s1indfrac=val.getValAsDouble(); found=true;
  }

  //////////// STAGE1 POST
  if (name.compare("Neinf")==0) {
    Neinf=val.getValAsDouble(); found=true;
  }else if (name.compare("muinf")==0) {
    muinf=val.getValAsDouble(); found=true;
  }

  if(found){
    return(checkStage(name,val.getVal()));
  }
  //////////// STAGE2 PRE
  if (name.compare("s2chunksperregion")==0) {
    s2chunksperregion=val.getValAsInt(); found=true;
  }else if (name.compare("s2samples")==0) {
    s2samples=val.getValAsInt(); found=true;
  }else if (name.compare("s2args")==0) {
    s2args=val.getVal(); found=true;
  }else if (name.compare("s2outputroot")==0) {
    s2outputroot=val.getVal(); found=true;
  }else if (name.compare("s2outputrootvec")==0) {
    s2outputrootvec=val.getValAsStringVec(); found=true;
  }else if (name.compare("s2combineargs")==0) {
    s2combineargs=val.getVal(); found=true;
  }

  if(found){
    return(checkStage(name,val.getVal()));
  }

  ////////////// STAGE 3
  //stage3-4 generics
  if (name.compare("cval")==0) {
    cval=val.getValAsDouble(); found=true;
  }else if (name.compare("cproot")==0) {
    cproot=val.getVal(); found=true;
    if(cpchunkcounts.compare("")==0) {
      cpchunkcounts=cproot;
      cpchunkcounts.append(".chunkcounts.out");
    }
  }else if (name.compare("cpchunkcounts")==0) {
    cpchunkcounts=val.getVal(); found=true;
  }
  //stage 3 parameters
  if (name.compare("s34args")==0) {
    s34args=val.getVal(); found=true;
  }else if (name.compare("fsroot")==0) {
    fsroot=val.getVal(); found=true;
  }
  if (name.compare("s3iters")==0) {
    s3iters=val.getValAsInt(); found=true;
  }else if (name.compare("s3iterssample")==0) {
    s3iterssample=val.getValAsInt(); found=true;
  }else if (name.compare("s3itersburnin")==0) {
    s3itersburnin=val.getValAsInt(); found=true;
  }else if (name.compare("numskip")==0) {
    numskip=val.getValAsInt(); found=true;
  }else if (name.compare("maxretained")==0) {
    maxretained=val.getValAsInt(); found=true;
  }else if (name.compare("nummcmcruns")==0) {
    nummcmcruns=val.getValAsInt(); found=true;
  }else if (name.compare("fsmcmcoutput")==0) {
    fsmcmcoutput=val.getVal(); found=true;
  }else if (name.compare("fsmcmcoutputvec")==0) {
    fsmcmcoutputvec=val.getValAsStringVec(); found=true;
  }else if (name.compare("old_fsmcmcoutputvec")==0) {
    old_fsmcmcoutputvec=val.getValAsStringVec(); found=true;
  }else if (name.compare("mcmcGR")==0) {
    mcmcGR=val.getValAsDoubleVec(); found=true;
  }else if (name.compare("threshGR")==0) {
    threshGR=val.getValAsDouble(); found=true;
  }

  if(found){
    return(checkStage(name,val.getVal()));
  }

  ///////////////// STAGE 4
  if (name.compare("s4iters")==0) {
    s4iters=val.getValAsInt(); found=true;
  }else if (name.compare("s4args")==0) {
    s4args=val.getVal(); found=true;
  }else if (name.compare("fstreeoutput")==0) {
    fstreeoutput=val.getVal(); found=true;
  }else if (name.compare("fstreeoutputvec")==0) {
    fstreeoutputvec=val.getValAsStringVec(); found=true;
  }
  if(found){
    return(checkStage(name,val.getVal()));
  }

  cerr<<"WARNING: Setting "<<val.getName()<<" not found!"<<endl;
  return(found);

}

void FsProject::resetToStage(int newstage)
{
  if(newstage>stage){
    cerr<<"Attempted to reset to stage "<<newstage<<" but we are only in stage "<<stage<<". Can only reset to the current or past stages!"<<endl;
    throw(runtime_error("reset stage error"));
  }
  for(int i=newstage;i<5;i++) validatedoutput[i]=0;
  if(newstage<1){ //reset the  stage0 stuff
    if(verbose) cout<<"Removing phase/recomb input information..."<<endl;
    phasefiles.clear();
    recombfiles.clear();
    nsnpsvec.clear();
    nsnps=0;
    ninds=0;
    nindsUsed=0;
  }
  if(newstage<2){ //reset the stage1 stuff
    if(verbose) cout<<"Removing EM results..."<<endl;
    s1outputrootvec.clear();
    s1outputroot="";
    Neinf=-1; 
    muinf=-1;
  }
  if(newstage<3){
    if(verbose) cout<<"Removing chromopainter main run results..."<<endl;
    s2outputrootvec.clear();
    s2outputroot="";
    s2combineargs="";
    cval=-1;
    cproot="";
    cpchunkcounts="";
  }
  if(newstage<4){
    if(verbose) cout<<"Removing finestructure mcmc run results..."<<endl;
    fsroot="";
    fsmcmcoutput="";
    fsmcmcoutputvec.clear();
    old_fsmcmcoutputvec.clear();
    mcmcGR.clear();
  }
  if(newstage<5){
    if(verbose) cout<<"Removing finestructure tree run results..."<<endl;
    s4args="";
    fstreeoutput="";
    fstreeoutputvec.clear();
  }
  stage=newstage;
}

void FsProject::createDuplicated(int newstage,string newname)
{
  dirname=projectroot(newname);
  newname=projectfull(dirname);
  setFileName(newname);
  dirname=projectroot(newname);
  if(newstage>2){ // keeping the chromopainter results, redoing fs
    ensures2aroot();
  }else{ // redoing at least some chromopainter. Need a new directory
    if(applyFiles("new",string("stop"))!=0) {
      throw(runtime_error("duplicate error"));
    }
  }
}

void FsProject::readFromFile()
{
  if(filename.compare("")==0) {
    throw(logic_error("fsproject: attempting to read project before file name is provided!"));
  }
  FsSettingsReader infile(filename,verbose);
  FsSettingsValue val;
  bool done=false;
  while(!done){
    val=infile.getNext();
    //    cout<<"DEBUG2 :\""<<val.getName()<<"\" \""<<val.getVal()<<"\""<<endl;
    if(val.success){
      applyVal(val);
    }else{
      done=true;
    }
  }
}

void FsProject::writeToFile()
{
  if(filename.compare("")==0) {
    throw(logic_error("fsproject: attempting to write project before file name is provided!"));
  }
  filebuf fb;
  try{
    fb.open (filename.c_str(),ios::out);
  }catch(exception &x){
    cerr<<"Error opening file!"<<endl<<x.what()<<endl; 
    throw(runtime_error("fsproject: cannot write to file!"));
  }
  ostream os (&fb);
  //////////////////////////////////////////////////
  os<<"section:fsproject"<<endl;
  os<<freetextcomments[0];
  os<<"section:history"<<endl;
  os<<historytext<<endl;
  os<<"section:parameters"<<endl;

  int paron=0;
  string lastsection="",thissection;
  for(int secon=0;secon<(int)sectionnames.size();++secon) {
    thissection=sectionnames[secon];
    if(thissection.compare(lastsection)!=0) {
      os<<endl<<"###################"<<endl<<"stage:"<<thissection<<endl;
      lastsection=thissection;
    }
    os<<"## "<<sectioncomments[secon]<<endl<<endl;
    while(paron<parsize[secon]){
      string tpar=pars[paron].getName();
      os<<tpar<<":";
      if(tpar.compare("exec")==0)os<<exec;
      else if(tpar.compare("verbose")==0)  os<<verbose;
      else if(tpar.compare("allowdep")==0)  os<<allowdep;
      else if(tpar.compare("fsfile")==0)  os<<fsfile;
      else if(tpar.compare("hpc")==0)os<<hpc;
      else if(tpar.compare("numthreads")==0) os<<numthreads;
      else if(tpar.compare("ploidy")==0)  os<<ploidy;
      else if(tpar.compare("linkagemode")==0)  os<<linkagemode;
      else if(tpar.compare("indsperproc")==0)  os<<indsperproc;
      else if(tpar.compare("outputlogfiles")==0)  os<<outputlogfiles;
      else if(tpar.compare("allowdep")==0)  os<<allowdep;
      else if(tpar.compare("s12inputtype")==0)  os<<s12inputtype;
      else if(tpar.compare("idfile")==0)  os<<idfile;
      else if(tpar.compare("s12args")==0)  os<<s12args;
      else if(tpar.compare("ninds")==0)  os<<ninds;
      else if(tpar.compare("nindsUsed")==0)  os<<nindsUsed;
      else if(tpar.compare("nsnps")==0)  os<<nsnps;
      else if(tpar.compare("s1args")==0)  os<<s1args;
      else if(tpar.compare("s1emits")==0)  os<<s1emits;
      else if(tpar.compare("s1minsnps")==0)  os<<s1minsnps;
      else if(tpar.compare("s1snpfrac")==0)  os<<s1snpfrac;
      else if(tpar.compare("s1indfrac")==0)  os<<s1indfrac;
      else if(tpar.compare("s1outputroot")==0)  os<<s1outputroot;
      else if(tpar.compare("Neinf")==0)  os<<Neinf;
      else if(tpar.compare("muinf")==0)  os<<muinf;
      else if(tpar.compare("s2chunksperregion")==0)  os<<s2chunksperregion;
      else if(tpar.compare("s2samples")==0)  os<<s2samples;
      else if(tpar.compare("s2args")==0)  os<<s2args;
      else if(tpar.compare("s2outputroot")==0)  os<<s2outputroot;
      else if(tpar.compare("s2combineargs")==0)  os<<s2combineargs;
      else if(tpar.compare("cval")==0)  os<<cval;
      else if(tpar.compare("cproot")==0)  os<<cproot;
      else if(tpar.compare("cpchunkcounts")==0)  os<<cpchunkcounts;
      else if(tpar.compare("s34args")==0)  os<<s34args;
      else if(tpar.compare("fsroot")==0)  os<<fsroot;
      else if(tpar.compare("s3iters")==0)  os<<s3iters;
      else if(tpar.compare("s3iterssample")==0)  os<<s3iterssample;
      else if(tpar.compare("s3itersburnin")==0)  os<<s3itersburnin;
      else if(tpar.compare("numskip")==0)  os<<numskip;
      else if(tpar.compare("maxretained")==0)  os<<maxretained;
      else if(tpar.compare("nummcmcruns")==0)  os<<nummcmcruns;
      else if(tpar.compare("fsmcmcoutput")==0)  os<<fsmcmcoutput;
      else if(tpar.compare("mcmcGR")==0)  os<<ssvec(mcmcGR);
      else if(tpar.compare("threshGR")==0)  os<<threshGR;
      else if(tpar.compare("s4iters")==0)  os<<s4iters;
      else if(tpar.compare("s4args")==0)  os<<s4args;
      else if(tpar.compare("fstreeoutput")==0)  os<<fstreeoutput;

      else if(tpar.compare("phasefiles")==0)  os<<ssvec(phasefiles);
      else if(tpar.compare("recombfiles")==0)  os<<ssvec(recombfiles);
      else if(tpar.compare("nsnpsvec")==0)  os<<ssvec(nsnpsvec);
      else if(tpar.compare("s1outputrootvec")==0)  os<<ssvec(s1outputrootvec);
      else if(tpar.compare("s2outputrootvec")==0)  os<<ssvec(s2outputrootvec);
      else if(tpar.compare("fsmcmcoutputvec")==0)  os<<ssvec(fsmcmcoutputvec);
      else if(tpar.compare("old_fsmcmcoutputvec")==0)  os<<ssvec(old_fsmcmcoutputvec);
      else if(tpar.compare("fstreeoutputvec")==0)  os<<ssvec(fstreeoutputvec);
      else if(tpar.compare("validatedoutput")==0)  os<<ssvec(validatedoutput);
      else throw(logic_error("Trying to write a command that doesn't exist!"));
      os<<"  # "<<pars[paron].getHelp()<<endl;
      ++paron;
    }
  }
  os<<endl<<"section:fsprojectend"<<endl;
  os<<"stage:"<<stage<<"  # "<<pars[paron].getHelp()<<endl;
  fb.close();
}
  
void FsProject::setupSections()
{
  string comments=constcommentspre;
  comments.append(constname0);
  comments.append(constname1);
  comments.append(constname2);
  comments.append(constname3);
  comments.append(constname4);
  comments.append(constcommentspost);
  freetextcomments.resize(0);
  freetextcomments.push_back(comments);
  freetextcomments.push_back(constname0);
  freetextcomments.push_back(constname1);
  freetextcomments.push_back(constname2);
  freetextcomments.push_back(constname3);
  freetextcomments.push_back(constname4);
}

void FsProject::setFileName(std::string f)
{
  filename=f;
  fileroot=f.substr(0,f.find_last_of('.'));
}

std::string FsProject::getFileName()
{
  return(filename);
}

void FsProject::setDirectoryName(std::string d)
{
  dirname=d;
}

std::string FsProject::getDirectoryName()
{
  return(dirname);
}

void FsProject::setVerbose(bool verbose) 
{
  this->verbose=verbose;
}

void FsProject::countData(){
  // Counts the data and checks that we have sane 
  ninds=getNindsFromFile(true);
  int nindsUnique=getUniqueNindsFromFile(true);
  if(ninds!=nindsUnique) {
  cerr<<"ERROR: ID file "<<idfile<<" appears to contain multiple IDs of the same name. Recreate this file without duplicates, or test with -createid <filename> to create a valid (uninformative) ID file."<<endl;
      throw(runtime_error("data error"));
  } 
  nindsUsed=getNindsFromFile(false);
  for(unsigned int c1=0;c1<phasefiles.size();c1++) {
    if(ninds==0 || getNhapsFromFile(c1)==0){
      cerr<<"ERROR: Failed to process a file. Found "<<ninds<<" individuals in the ID file called "<<idfile<<", and in phase file called "<<phasefiles[c1]<<" with "<<getNhapsFromFile(c1)<<" inds. This is a data problem."<<endl;
      throw(runtime_error("data error"));
    }
    int thaps=getNhapsFromFile(c1);
    if(thaps!=ninds*ploidy){
      cerr<<"ERROR: Mismatch between number of individuals in the ID file called "<<idfile<<" with "<<ninds<<" inds, and in phase file called "<<phasefiles[c1]<<" with "<<getNhapsFromFile(c1)<<" haplotypes. This can be a data or a ploidy problem."<<endl;
      double tploidyrem=thaps % ninds;
      double tploidy=thaps / ninds;
      if(tploidyrem==0) {
         cerr<<"INFORMATION: You appear to have "<<tploidy<<" haplotype";
	 if(tploidy>1)cerr<<"s";
	 cerr<<" per individual";
	 if(tploidy==1) cerr<<", i.e. haploid data";
	 cerr<<". Suggest rerunning with \"-ploidy "<<tploidy<<"\"";
	 if(tploidy==2) cerr<<" (or simply omitting the ploidy flag)";
	 cerr<<"."<<endl;
      }
      throw(runtime_error("data error"));
    }
  }
  nsnpsvec.clear();
  nsnps=0;
  for(unsigned int c1=0;c1<phasefiles.size();c1++){
    int tnsnps=getNsnpsFromFile(c1);
    nsnpsvec.push_back(tnsnps);
    nsnps+=tnsnps;
  }
  if(verbose){
    cout<<"Counted "<<ninds<<" Individuals making up "<<ninds*ploidy<<" haplotypes, using "<<nindsUsed<<" of them. Using "<<nsnps<<" total SNPs over "<<phasefiles.size()<<" phase files."<<endl;
  }
  stage=1;
  validatedoutput[0]=1;
}

void FsProject::copyfile(std::string from, std::string to)
{
  std::ifstream  src(from.c_str(), std::ios::binary);
  std::ofstream  dst(to.c_str(),   std::ios::binary);
  dst << src.rdbuf();
}

void FsProject::safeCreateFile()
{
  if( access( filename.c_str(), F_OK ) != -1 ) { // file exists
    if(verbose) cout<<"Project filename: "<<filename<<" already exists; backing it up..."<<endl;

    std::ostringstream backupfilenamess;
    int i=0;
    do{
      i++; // first backup is called <x>1.bak
      backupfilenamess.str("");
      backupfilenamess << dirname<<"/cpbackup/";
      ensureDirectory(backupfilenamess.str());
      backupfilenamess<<filename << i << ".bak";
      if(i>10000) {
	throw(logic_error("FsProject::safeCreateFile concern: found > 10000 backups, can't find a safe name to rename old project file?"));
      }
    }while(access( backupfilenamess.str().c_str(), F_OK ) != -1 ); // backupfile exists

    if(verbose) cout<<"Backing up "<<filename<<" to "<<backupfilenamess.str()<<endl;
    copyfile(filename,backupfilenamess.str());
  } 
  writeToFile();
}

void FsProject::writeStringToFile(std::string s,std::string fn) 
{
  filebuf fb;
  try{
    fb.open (fn.c_str(),ios::out);
  }catch(exception &x){
    cerr<<"Error opening file!"<<endl<<x.what()<<endl; 
    throw(runtime_error("fsproject: cannot write to file!"));
  }
  ostream os (&fb);
  os<<s;
  fb.close();
}

void FsProject::writeStringVectorToFile(std::vector<std::string> sv,std::string fn,std::vector<std::string> logfiles,bool addfs) 
{
  filebuf fb;
  try{
    fb.open (fn.c_str(),ios::out);
  }catch(exception &x){
    cerr<<"Error opening file!"<<endl<<x.what()<<endl; 
    throw(runtime_error("fsproject: cannot write to file!"));
  }
  ostream os (&fb);
  for(unsigned int i=0;i<sv.size();i++){
    if(addfs) os<<exec<<" ";
    os<<sv[i];
    if(logfiles.size()==sv.size()) os<<" &> "<<logfiles[i];
    os<<endl;
  }
  fb.close();
}

std::vector<char *> FsProject::converttoargv(std::string s)
{
  std::string::iterator new_end = std::unique(s.begin(), s.end(), BothAreSpaces);
  s.erase(new_end, s.end()); 
  std::vector<std::string> mycmd=split(s,' ');
  std::vector<char *> ccmd(mycmd.size());
  for (unsigned int j = 0; j < mycmd.size(); ++j) {
    ccmd[j] = (char *)malloc(mycmd[j].size()+1);
    strcpy(ccmd[j],mycmd[j].c_str());
  }
  return(ccmd);
}

void FsProject::freeargv(std::vector<char *> ccmd)
{
  for (unsigned int j = 0; j < ccmd.size(); ++j) {
    free(ccmd[j]);
  }
}

void FsProject::doCpStage(int stage) {
#ifndef _OPENMP
  printf("WARNING: You have compiled this code without OpenMP support. Parallel processing is not possible without using HPC mode (\"-hpc 1\") and external parallelization. Consider recompiling after reconfiguring openMP.\n");
#endif

  if((stage<1) | (stage>2)){
    throw(logic_error("fsproject: calling an invalid chromopainter stage (should be 1 or 2)!"));
  }
  std::vector<std::string> sv;
  if(stage==1) sv=s1commands;
  if(stage==2) sv=s2commands;

  int cmdon=0;
  if(numthreads>0) {
    do_omp_set_num_threads(numthreads);
  }
  int allok=1;
#pragma omp parallel for
  for(unsigned int i=0;i<sv.size();i++)  {
    //    int thread_number = omp_get_thread_num();
    //    if(!allok)continue;
    string tsv=sv[i];
    string logfile;
    std::ostringstream ss;
    if(stage==1) logfile=s1outputrootvec[i];
    else logfile=s2outputrootvec[i];
    logfile.append(".log");
    tsv.append(" --noexitonerrors ");
    tsv.append(logfile);

    std::vector<char *> argv=converttoargv(tsv);
#pragma omp atomic
    cmdon++;
#pragma omp critical
    ss<<"Running stage "<<stage;
    if(stage==1) ss<<" (chromopainter parameter estimation) ";
    else ss<<" (chromopainter painting) ";
    ss<<"command number (~"<<cmdon<<") of "<<sv.size()<<" (logging to "<<logfile<<")\n";
    cout<<(ParallelStream()<<ss.str()).toString();
    if(verbose) cout<<(ParallelStream()<<"RUNNING S"<<stage<<" CMD:"<<tsv<<"\n").toString();

    //switchStdout(logfile.c_str());
    int rv=chromopainter(argv.size(),argv.data());
    freeargv(argv);
    //revertStdout();
    
    // check that it ran correctly
    if((getLastLine(logfile).compare(cpsuccesstext)!=0)|| (rv>0)) {
      cerr<<"ChromoPainter Run "<<i<<" failed! Return value was "<<rv<<endl;
      string el=getLineContaining("Exiting",logfile);
      string ll=getLastLine(logfile);
      cerr<<"Error line was \""<<el<<"\""<<endl;
      if(ll.compare("")==0) {
	cerr<<"Logging failed. This usually means that chromopainter terminated abnormally."<<endl;
      }else{
	cerr<<"See log file ("<<logfile<<") for more details."<<endl;
      }
      allok=0;
    }// endif
  }// end for
  if(!allok && stage==2) throw(runtime_error("chromopainter"));
}

void FsProject::addEMline(string line, vector<double> *Nevec_ptr, vector<double> *muvec_ptr){
  std::vector<std::string> sline=split(line,' ');
  if(sline.size()<(unsigned int)(3+ploidy)){
    //    cerr<<"ERROR: -combines1 EMfile problem: expect 5 columns but received "<<sline.size()<<" :"<<line<<endl;
    throw(runtime_error("-combines1 file error"));
  }
  Nevec_ptr->push_back(atof(sline[1+ploidy].c_str()));
  muvec_ptr->push_back(atof(sline[2+ploidy].c_str()));
  if(verbose) cout<<"... Found EM values: Ne="<<sline[1+ploidy].c_str()<<" and mu="<<sline[2+ploidy].c_str()<<endl;
}

void FsProject::addEMobs(string filename, vector<double> *Nevec_ptr, vector<double> *muvec_ptr){
  ifstream file(filename.c_str());
  string lastline(""),line;
  int foundnew;
  while(getline(file, line)){
    foundnew=0;
    if(line.substr(0,5).compare("EMPAR")==0){
      foundnew=1;
    }
    if(foundnew==1 && lastline.compare("")!=0) addEMline(lastline,Nevec_ptr,muvec_ptr);
    lastline=line;
  }
  if(lastline.compare("")!=0) {
      addEMline(lastline,Nevec_ptr,muvec_ptr);
  }
  file.close();

}

void FsProject::combineStage1()
{ // extract all EMPARAMETERS from stage1 files
  vector<double> Nevec;
  vector<double> muvec;
  vector<string> stage1emfiles;
  // sanity checks
  if(s1outputroot.size()<1){ // invalid; we need output files to combine
    cerr<<"Combining stage1 files for parameter inference requires stage1 to have been defined and run."<<endl;
    throw(runtime_error("fsproject: combines1 stage1 files undefined"));
  }
  int nerrs=0;
  for(unsigned int c1=0;c1<s1outputrootvec.size();c1++){
    string tfile=s1outputrootvec[c1];
    tfile.append(".EMprobs.out");
    stage1emfiles.push_back(tfile);
    if(verbose) cout<<"Adding EM results for file "<<tfile<<endl;
    if( access( tfile.c_str(), F_OK ) == -1 ) { // file does not exist
      cerr<<"Combining stage1 files for parameter inference requires stage1 to have been run. Has stage1 been run? Have the  \"*.EMprobs.out\" files been recovered from remote processing?"<<endl;
      throw(runtime_error("fsproject: combines1 stage1 missing results"));
    }
    try{
      addEMobs(tfile,&Nevec,&muvec);
    }catch(runtime_error& e) {
	string swhat=e.what();
	if(swhat.compare("-combines1 file error")==0){
	  ++nerrs;
	  cerr<<"WARNING: EMfile "<<tfile<<" could not be processed. This could mean that the parameters converged to an invalid point, or that the computation was not completed."<<endl;
	}
    }
  }

  // Check for success
  if(nerrs>=(int)s1outputrootvec.size()){
    cerr<<"ERROR: No EM runs contain valid parameter inference. Something went wrong."<<endl;
    cerr<<"If ChromoPainter stage1 did not complete, try \"-reset 1 -go\" to rerun from stage1."<<endl;
    throw(runtime_error("EM estimation failed"));
  }else if(nerrs>0){
    cerr<<"WARNING: "<<nerrs<<" EM runs (of "<<s1outputrootvec.size()<<") failed. Continuing by disregarding those estimates. This may lead to problems."<<endl;
  }

  // combine
  if(verbose) cout<<"Combining "<<Nevec.size()<<" Ne values from "<<stage1emfiles.size()<<" files"<<endl;
  Neinf=0;
  muinf=0;
  for(unsigned int c1=0;c1<Nevec.size();c1++)Neinf+=Nevec[c1];
  for(unsigned int c1=0;c1<muvec.size();c1++)muinf+=muvec[c1];
  Neinf/=(double)Nevec.size();
  muinf/=(double)muvec.size();
  cout<<"Inferred Ne="<<Neinf<<" and mu="<<muinf<<endl;
  stage=2;
  validatedoutput[1]=1;
}

void FsProject::combineStage2()
{
  if(s2outputrootvec.size()<1){
    //    cerr<<"Combining stage2 files requires stage2 to have been defined and run."<<endl;
    throw(runtime_error("fsproject: combines2 stage2 file details not defined, we have not successfully performed -makes2"));
  }
  string ts="combine -v ";
  ts.append(s2combineargs);
  ts.append(" -o ");
  ts.append(cproot);
  for(unsigned int c1=0;c1<s2outputrootvec.size();c1++){
    string tfile=s2outputrootvec[c1];
    tfile.append(".chunkcounts.out");
    if( access( tfile.c_str(), F_OK ) == -1 ) { // file does not exist
      //      cerr<<"Combining stage2 files requires stage2 to have been run. Has stage2 been run? Have all \"*.out*\" files been recovered from remote processing?"<<endl;
	cerr<<"Failed to read "<<tfile.c_str()<<" in stage -combines2. This is usually because -dos2 has not been performed."<<endl;
      throw(runtime_error("fsproject: combines2 stage2 missing results"));
    }
    ts.append(" ");
    ts.append(s2outputrootvec[c1]);
  }
  std::vector<char *> argv=converttoargv(ts);

  std::ostringstream ss;

  ss<<dirname<<"/stage2a/";
  ensureDirectory(ss.str());
  ss<<cproot<<".stage2combine.log";
  string logfile=ss.str();
  cout<<"Combining stage2 files to file root "<< cproot<<" (logging to "<<logfile<<")"<<endl;
    if(verbose) cout<<"RUNNING STAGE2a CMD:"<<ts<<endl;
    switchStdout(logfile.c_str());

    int rv=1;
    try{
      rv=chromocombine(argv.size(),argv.data());
    }catch(exception &x){
      cerr<<"Caught ChromoCombine error: "<<x.what()<<endl;
    }
    freeargv(argv);
    revertStdout();
    // Check that it ran correctly
    if((getLastLine(logfile).compare(ccsuccesstext)!=0) | (rv!=0)){
      cerr<<"ChromoCombine failed! See log file ("<<logfile<<") for details."<<endl;
      cerr<<"If ChromoPainter stage2 did not complete, try \"-reset 2 -go\" to rerun from stage2."<<endl;
      throw(runtime_error("chromocombine"));
    }
    // checkthe chunkcount file, extract C and check it worked OK
    cpchunkcounts=cproot;
    cpchunkcounts.append(".chunkcounts.out");
    ifstream file(cpchunkcounts.c_str());
    string header;
    getline(file, header);
    file.close();
    std::vector<std::string> headervec=split(header,' ');
    cval=-1;
    if(headervec.size()==2){
      if(headervec[0].compare("#Cfactor")==0){
	istringstream ( headervec[1] ) >> cval;
      }
    }
    if(cval<0){
      cerr<<"ChromoCombine failed! The combined file "<<cpchunkcounts<<" doesn't contain valid information about 'c'. Something went wrong; See log file ("<<logfile<<") for details."<<endl;
      throw(runtime_error("chromocombine"));
    }else if(cval==0){
      cerr<<constccerror<<endl;
      throw(runtime_error("chromocombine"));
    }else{
      cout<<"Successfully run ChromoCombine stage! Inferred a 'c' value of "<<cval<<endl;
      if(linkagemode.compare("linked")==0){
	if((cval>2) | (cval<0.1)) cerr<<"WARNING: in linked mode, we usually expect 'c' to be between 0.1 and 2. You are advised to examine whether there have been processing problems (for example, has the parameter inference become stuck at parameters implying effectively unlinked data?)"<<endl;
      }else{
	if(cval>0.1) cerr<<"WARNING: in unlinked mode, we usually expect 'c' to be less than 0.1. You are advised to examine whether there have been processing problems."<<endl;
      }
    }
    stage=3;
    validatedoutput[2]=1;
}

void FsProject::combineStage3()
{ // check MCMC output
  bool haveoutput=true;
  if(fsmcmcoutputvec.size()==0){haveoutput=false;
  }else if( access( fsmcmcoutputvec[0].c_str(), F_OK ) == -1 )haveoutput=false;
  if(!haveoutput){ // file does not exist
    //    cerr<<"Combining stage3 files requires stage3 to have been run. Has stage3 been run? Have all \"*.xml*\" files been recovered from remote processing?"<<endl;
    cerr<<"If stage3 did not complete, try \"-reset 3 -go\" to rerun from stage3."<<endl;

    throw(runtime_error("fsproject: combines3 stage3 missing results"));
  }

  bool converged=0;
  try{
    converged=mcmcConvergence();
}catch(exception &x){
    cerr<<"Error opening file!"<<endl<<x.what()<<endl; 
    throw(runtime_error("-combines3: MCMC reading error. Need to rerun MCMC!"));
  }
  double maxgr=mcmcGR[0];
  for(unsigned int c1=1;c1<mcmcGR.size();c1++) maxgr=max(maxgr,mcmcGR[c1]);
  if(!converged){
    // Reject the MCMC run, double the duration and continue the run
    validatedoutput[3]=0;
    stage=3;
      cout<<"WARNING: Failed Gelman-Rubin MCMC diagnostic check with maximum potential scale reduction factor "<<maxgr<<" (threshold "<<threshGR<<")"<<endl;
  }else{
    if(verbose){
      if(threshGR>0) cout<<"Passed";
      else cout<<"Skipped";
      cout << " Gelman-Rubin MCMC diagnostic check with maximum potential scale reduction factor "<<maxgr<<" (threshold "<<threshGR<<")"<<endl;
    }
    // Move it to the root of the directory
    size_t found =fsmcmcoutputvec[0].find("/stage3/");
    string tstr=fsmcmcoutputvec[0].substr(found+8);
    if(verbose) cout<<"Moving "<<fsmcmcoutputvec[0]<<" to "<<tstr<<endl;
    rename(fsmcmcoutputvec[0].c_str(),tstr.c_str());
    fsmcmcoutputvec[0]=tstr;
    stage=4;
    validatedoutput[3]=1;
  }
}

void FsProject::combineStage4()
{ // check TREE output
  if(haveOutput(4)){ // THIS NEEDS TO BE MORE THOROUGH!
    // Move it to the root of the directory
    size_t found =fstreeoutputvec[0].find("/stage4/");
    string tstr=fstreeoutputvec[0].substr(found+8);
    if(verbose) cout<<"Moving "<<fstreeoutputvec[0]<<" to "<<tstr<<endl;
    rename(fstreeoutputvec[0].c_str(),tstr.c_str());
    fstreeoutputvec[0]=tstr;

    stage=5;
    validatedoutput[4]=1;
  }else{
    cerr<<"If stage4 did not complete, try \"-reset 4 -go\" to rerun from stage4."<<endl;
    throw(runtime_error("fsproject: combines4 stage4 missing results"));
  }
}

void FsProject::writeHpcStage3(string cmdfile)
{
  ostringstream ss;
  ss<<exec<<" "<<filename<<" -hpc 0 -allowdep 0 -makes3 -dos3 -combines3 -allowdep 1 -hpc 1";
  writeStringToFile(ss.str(),cmdfile);
}

void FsProject::createIdFile(string idfile)
{
  if(phasefiles.size()==0) {
    cerr<<"ERROR: Require phase file to create idfile!"<<endl;
    throw(runtime_error("-createid"));
  }
  ///////////// write the ID file
  int nhaps=getNhapsFromFile(0);
  int nindst=nhaps/ploidy;
  if(nindst!=(int)ceil(nhaps/ploidy)) {
    cerr<<"ERROR: phase file contains a number of haps that is not a multiple of the ploidy!"<<endl;
    throw(runtime_error("-createid"));
  }

  filebuf fb;
  try{
    fb.open (idfile.c_str(),ios::out);
  }catch(exception &x){
    cerr<<"Error opening file!"<<endl<<x.what()<<endl; 
    throw(runtime_error("-createid: cannot write to file!"));
  }
  ostream os (&fb);
  for(int i=0;i<nindst;i++){
    os<<"IND"<<i+1<<endl;
  }
  fb.close();
}

int FsProject::recommendN()
{
  int ncmds=getCommandFileCount();
  if(stage==3 && ncmds==1){
#ifdef _OPENMP
    return(nummcmcruns);
#endif
  }
  if(ncmds<8) return(ncmds);
  else return(8);
}

int FsProject::recommendM()
{
  // data:
  int ncmds=getCommandFileCount();
  int n=recommendN();

  if(stage==3 && ncmds==1){
#ifdef _OPENMP
    return(nummcmcruns);
#endif
  }
  // Linear growth (c*x) until x0 and then logarithmic (alpha*log(a*x+b), matching the value and gradient at x0.
  double x0=80;// max number of commands for which we parallelize completely
  double c=1.0/8.0;// 1/number of commands per batch
  double alpha=4.0; // exponent in the growth rate of the logarithmic section of the curve

  if(ncmds<=x0) return(n);
  double a=exp(c*x0/alpha)*c/alpha;
  double b=exp(c*x0/alpha)-a*x0;
  cout<<"DEBUG "<<a<<" " <<b<<" "<<alpha*log(a*ncmds+b)<<endl;
  return(ceil(alpha*log(a*ncmds+b)));
}

bool FsProject::haveOutput(int stage,bool forcombined)
{
  if(stage==1){
    if(s1outputrootvec.size()==0) return(false);
    for(unsigned int i=0;i<s1outputrootvec.size();i++){
      string f=s1outputrootvec[i];
      f.append(".EMprobs.out");
      if( access( f.c_str(), F_OK ) == -1 ) { // file doesn't exist
	//	if(verbose) cout<<"Do not have stage1 output "<<f<<endl;
	return(false);
      }
    }
  }else if((stage==2) && !(forcombined)){
    if(s2outputrootvec.size()==0) return(false);
    for(unsigned int i=0;i<s2outputrootvec.size();i++){
      string f=s2outputrootvec[i];
      f.append(".chunkcounts.out");
      if( access( f.c_str(), F_OK ) == -1 ) { // file doesn't exist
	//	if(verbose) cout<<"Do not have stage2 output "<<f<<endl;
	return(false);
      }
    }
  }else if((stage==2) && (forcombined)){
    string f=cpchunkcounts;
    if( access( f.c_str(), F_OK ) == -1 ) { // file doesn't exist
      //      if(verbose) cout<<"Do not have stage2 combined output "<<f<<endl;
      return(false);
    }
  }else if((stage==3) && (!forcombined)){
    if(fsmcmcoutputvec.size()==0) return(false);
    for(unsigned int i=0;i<fsmcmcoutputvec.size();i++){
      string f=fsmcmcoutputvec[i];
      if( access( f.c_str(), F_OK ) == -1 ) { // file doesn't exist
	//	if(verbose) cout<<"Do not have stage3 output "<<f<<endl;
	return(false);
      }
      if (getLastLine(f).compare(string("</outputFile>"))!=0) {
	if(verbose) cout<<"Do not have complete stage3 output "<<f<<endl;
	return(false);
      }
    }
  }else if((stage==3) && (forcombined)){
    if(fsmcmcoutputvec.size()==0) return(false);
    for(unsigned int i=0;i<fsmcmcoutputvec.size();i++){
      string f=fsmcmcoutputvec[i];
      if( access( f.c_str(), F_OK ) == -1 ) { // file doesn't exist
	//	if(verbose) cout<<"Do not have stage3 output "<<f<<endl;
	return(false);
      }
      if (getLastLine(f).compare(string("</outputFile>"))!=0) {
	if(verbose) cout<<"Do not have complete stage3 output "<<f<<endl;
	return(false);
      }
    }
  }else if(stage==4){
    if(fstreeoutputvec.size()==0) return(false);
    for(unsigned int i=0;i<fstreeoutputvec.size();i++){
      string f=fstreeoutputvec[i];
      if( access( f.c_str(), F_OK ) == -1 ) { // file doesn't exist
	//	if(verbose) cout<<"Do not have stage4 output "<<f<<endl;
	return(false);
      }
      if (getLastLine(f).compare(string("</outputFile>"))!=0) {
	if(verbose) cout<<"Do not have complete stage4 output "<<f<<endl;
	return(false);
      }
    }
  }
  return(true);
}

bool FsProject::canDo(string cmd)
{
  //////
  // TO DO : check for data for -makes1 and -makes2 
  if(cmd.compare("-makes1")==0){
    if(ninds<=0) return(false);
  }else if(cmd.compare("-dos1")==0){
    if(s1commands.size()==0) return(false);
  }else if(cmd.compare("-combines1")==0){
    if(!haveOutput(1)) return(false);
  }else if(cmd.compare("-makes2")==0){
    if(((muinf<=0)||(Neinf<=0))&(linkagemode.compare("linked")==0)) return(false);
    if((ninds<=0)&(linkagemode.compare("unlinked")==0)) return(false);
  }else if(cmd.compare("-dos2")==0){
    if(s2commands.size()==0) return(false);
  }else if(cmd.compare("-combines2")==0){
    if(!haveOutput(2)) return(false);
  }else if(cmd.compare("-makes3")==0){
    if(!haveOutput(2,true)) return(false); // check whether combine done
  }else if(cmd.compare("-dos3")==0){
    if(s3commands.size()==0) return(false);
  }else if(cmd.compare("-combines3")==0){
    if(!haveOutput(3)) return(false);
  }else if(cmd.compare("-hpcs3")==0){ // -hpcs3 replaces -makes3 and -dos3
    if(!haveOutput(2,true)) return(false); // check whether combine done
  }else if(cmd.compare("-makes4")==0){
    if(!validatedoutput[3]) return(false);
  }else if(cmd.compare("-dos4")==0){
    if(s4commands.size()==0) return(false);
  }else if(cmd.compare("-combines4")==0){
    if(!haveOutput(4)) return(false);
  }else if(cmd.compare("-go")==0){
    if(!validatedoutput[4]) return(false);
  }
  return(true);
}

std::vector<std::string> FsProject::getDependencies(std::vector<std::string> args)
{
  // The dependency tree. It is actually trivial:
  // makes1->dos1->combines1->makes2->dos2->combines2->makes3->dos3->makes4->dos4
  // with complications because do becomes write for hpc mode
  // -hpcs3 complicates things further in hpc mode!
 
  std::vector<std::string> ret;
  if(args[0].compare("-go")==0){
        if(!canDo(args[0]))ret.push_back("-combines4");
  }
  if(args[0].compare("-combines4")==0){
    if(!canDo(args[0])){
      if(hpc) ret.push_back("-writes4");
      if(hpc!=1) ret.push_back("-dos4");
    }
  }
  if((args[0].compare("-dos4")==0)|(args[0].compare("-writes4")==0)){
    if(!canDo("-dos4")) ret.push_back("-makes4");
  }
  if(args[0].compare("-makes4")==0){
    if(!canDo(args[0])) ret.push_back("-combines3");
  }
  if(args[0].compare("-combines3")==0){
    if(!canDo(args[0])) {
      int usehpcs3=0;
#ifdef _OPENMP
      usehpcs3=1;
#endif
      if(hpc && !usehpcs3) {ret.push_back("-writes3");
      }else if(hpc && usehpcs3) {ret.push_back("-hpcs3");
      }else ret.push_back("-dos3"); //!hpc mode
    }
  }
  if((args[0].compare("-dos3")==0)|(args[0].compare("-writes3")==0)){
    if(!canDo("-dos3")) ret.push_back("-makes3");
  }
  if(args[0].compare("-makes3")==0){
    if(!canDo(args[0])) ret.push_back("-combines2");
  }
  if(args[0].compare("-hpcs3")==0){
    if(!canDo(args[0])) ret.push_back("-combines2");
  }
  if(args[0].compare("-combines2")==0){
    if(!canDo(args[0])) {
      if(hpc) ret.push_back("-writes2");
      if(hpc!=1)  ret.push_back("-dos2");
    }
  }
  if((args[0].compare("-dos2")==0)|(args[0].compare("-writes2")==0)){
    if(!canDo("-dos2")) ret.push_back("-makes2");
  }
  if(args[0].compare("-makes2")==0){
    ensures2root();
    if(whichLinkagemode().compare("linked")==0){
      if(!canDo(args[0])) ret.push_back("-combines1");
    }
    if(whichLinkagemode().compare("unlinked")==0){
      if(!canDo(args[0])) ret.push_back("-countdata");
    }
  }
  if(args[0].compare("-combines1")==0){
    if(!canDo(args[0])) {
      if(hpc) ret.push_back("-writes1");
      if(hpc!=1)  ret.push_back("-dos1");
    }
  }
  if((args[0].compare("-dos1")==0)|(args[0].compare("-writes1")==0)){
    if(!canDo("-dos1")) ret.push_back("-makes1");
  }
  if(args[0].compare("-makes1")==0){
    if(!canDo(args[0])) ret.push_back("-countdata");
    ensures1root();
  }

  return(ret);
}

int FsProject::parIndex(std::string partest) {
  for(unsigned int c1=0;c1<pars.size();c1++){
    if(partest.compare(pars[c1].getName())==0){
      return(c1);
    }
  }
  return(-1);
}

int FsProject::cmdIndex(std::string cmdtest) {
  for(unsigned int c1=0;c1<cmds.size();c1++) {
    if(cmdtest.compare(cmds[c1].getName())==0){
      return(c1);
    }
  }
  return(-1);
}

std::string FsProject::cmdInfo(std::string cmd,bool statetype){
  std::ostringstream ss;
  while(cmd.substr(0,1).compare("-")==0){ // strip leading -
    cmd=cmd.substr(1,cmd.size());
  }
    
  // Parameters
  int parindex=parIndex(cmd);
  if(parindex>=0) {
    if(statetype) ss<<"Parameter ";
    ss<<pars[parindex].getName()<<" : "<<pars[parindex].getHelp();
    return(ss.str());
  }

  // Actions
  int cmdindex=cmdIndex(cmd);
  if(cmdindex>=0) {
    if(statetype) ss<<"Action    ";
    ss<<"-"<<cmds[cmdindex].getName();
    if(cmds[cmdindex].getShortargs().compare("")!=0)ss<<" "<<cmds[cmdindex].getShortargs();
    ss<<" : "<<cmds[cmdindex].getHelp();
    return(ss.str());
  }

  ss<<cmd<<" : "<<string("No help available. Is this a valid action or parameter?");
  return(ss.str());
}

bool FsProject::checkArgs(std::vector<std::string> args,int minargs,int maxargs)
{
  int nargs= (int)args.size()-1;
  if(maxargs==-1) maxargs=minargs;
  if(nargs<minargs || (nargs>maxargs && maxargs>=0)) {
    cerr<<"ERROR: Incorrect number of arguments: "<<args[0]<<" takes "<<minargs;
    if(maxargs== -2) {cerr<<"-INF";
    }else if(maxargs>minargs) cerr<<"-"<<maxargs;
    cerr<<" argument(s)."<<endl;
    cerr<<"Help for "<<cmdInfo(args[0])<<endl;
    throw(runtime_error(args[0]));
  }
  return(true);
}

int FsProject::checkStage(std::string args,std::string val) {
  int tpar=parIndex(args);
  if(tpar<0) {
    return(0);
  }
  try{checkStage(args,pars[tpar].getStage()); 
  }catch(exception &x) {
    return(0);
  }
  if(verbose) cout<<"Successfully read "<<args<<":\""<<val<<"\""<<endl;
  return(1);
}

void FsProject::checkStage(std::string args,int maxstage){
  //  cout<<"checkstage args "<<args<<" stage "<<stage<<" maxstage "<<maxstage<<endl;
    if(stage>maxstage && maxstage>=0) {
      cerr<<"ERROR: Tried to set "<<args<<" when in processing stage "<<stage<<", but this can only be done before stage "<<maxstage<<".  "<<constdupreset<<endl ;
      throw(runtime_error("commands out of order"));
    }
  
}

void FsProject::checkStage(std::vector<std::string> args,int maxstage){
  checkStage(args[0],maxstage);
}

void FsProject::docmd(std::vector<std::string> args)
{
  bool found=false;
  if(args.size()==0) throw(logic_error("fsproject: docmd empty command!"));
  ///////////////////////////////////////
  // resolve dependencies
  std::vector<std::string> prev=getDependencies(args);
  if(prev.size()>0) {
    if(allowdep){
      if(verbose) cout<<"NOTE: Determined that "<<prev[0]<<" needs to be run first; will attempt to perform this and return to this command."<<endl;
      for(unsigned int c1=0;c1<prev.size();c1++){ // do all the dependencies, not just the first one
	std::vector<std::string> thisprev;
	thisprev.push_back(prev[c1]);
	docmd(thisprev);
      }
    }else{
      cerr<<"ERROR: Command "<<args[0]<<" requires that "<<prev[0]<<" has been run, but dependency resolution has been disabled. You need to manually resolve the dependencies."<<endl;
      throw(runtime_error("dependencies unmet"));
    }
  }
 
  ///////////////////////////////////////
  // HELPER FUNCTIONS
  if(args[0].compare("-import")==0){ // import settings from a text file
    found=checkArgs(args,1);
    string oldfile=filename;
    filename=args[1];
    try{
      readFromFile();
    }catch(exception &e){
      cerr<<"Error reading settings file!"<<endl<<e.what()<<endl; 
      filename=oldfile;
      throw(runtime_error("Command line failure"));
    }
    filename=oldfile;
  }else if(args[0].compare("-duplicate")==0){ // duplicate a project to a new settings file
    found=checkArgs(args,2);
    int newstage=0;
    try{newstage=stringToInt(args[1]);
    }catch(exception &x){
      cerr<<"-duplicate argument error: first argument should be an integer."<<endl;
      cerr<<"Help for "<<cmdInfo(args[0])<<endl;
      throw(runtime_error("-duplcate arguments error"));
    }
    resetToStage(newstage);
    createDuplicated(newstage,args[2]);
  }else if(args[0].compare("-reset")==0){ // duplicate a project to a new settings file
    found=checkArgs(args,1);
    int newstage=0;
    istringstream ( args[1] ) >> newstage;
    resetToStage(newstage);
  }

  ///////////////////////////////////////
  // DATA IO

  if(args[0].compare("-createid")==0){ // ID FILE (Important)
    found=checkArgs(args,1);
    checkStage(args,1);
    if(verbose) cout<<"CREATING ID FILE: "<<args[1]<<endl;
    cerr<<"WARNING: When creating the ID file, we must have the correct -phasefiles. The ploidy must also be set (diploid by default, -haploid for haploids)."<<endl;
    createIdFile(args[1]);
    idfile=args[1];
  }

  /*if(args[0].compare("-recombfiles")==0){ // RECOMBINATION FILES (Important)
    found=checkArgs(args,1,-2);
    checkStage(args,1);
    vector<string> oldrec=recombfiles;
    for(unsigned int i=1;i<args.size();i++){
      recombfiles.push_back(args[i]);
    }
    if(unique(recombfiles).size()!=recombfiles.size()){// duplicates
      recombfiles=oldrec;
      cerr<<"ERROR: duplicated recomb files provided! You only need to specify each recombination file once, and you should not use --recombfiles in future commands."<<endl;
      throw(runtime_error("duplicate stage files"));
   }

    s12inputtype=string("phase");
    linkagemode=string("linked");
    if(verbose) cout<<"RECOMB FILES: read "<<recombfiles.size()<<" starting with "<<recombfiles[0]<<endl;
  }*/

/*  if(args[0].compare("-donorfile")==0){
    found=checkArgs(args,1,-2);
    checkStage(args,1);
    cerr<<"ERROR: Donor files are currently not supported in fs. Use -idfile only."<<endl;
    throw(runtime_error(args[0]));
        if(args.size()!=2) {
      cerr<<"ERROR: Incorrect number of arguments: -donorfile takes exactly 1 argument"<<endl;
      throw(runtime_error(args[0]));
    }
    s12donorfile=args[1];
    if(verbose) cout<<"DONOR FILE: "<<s12donorfile<<endl;
    found=true;
  }*/

  if(args[0].compare("-datalist")==0){ // DATA LIST (Not implemented)
    found=checkArgs(args,1);
    checkStage(args,1);
    cout<<"NOT IMPLEMENTED: But this should set data from a file"<<endl;
  } 

  //////////////////////////
  if(args[0].compare("-countdata")==0){ /// Count and check the data
    found=checkArgs(args,0);
    checkStage(args,1);
    countData();
  }

  ///////////////////////////////////
  // Making commands
  if(args[0].compare("-makes1")==0){ /// Create STAGE 2 COMMANDS
    found=checkArgs(args,0);
    checkStage(args,1);

    if(recombfiles.size()==0) {
      cerr<<"ERROR: stage1 not required for unlinked model, but no recombination files provided. Did you forget to provide the recombination map?"<<endl;
      throw(runtime_error(args[0]));
    }
    makeStage1();
  }

  if(args[0].compare("-makes2")==0){ /// Create STAGE 2 COMMANDS
    found=checkArgs(args,0);
    checkStage(args,2);
    if(recombfiles.size()>0) linkagemode=string("linked");
    makeStage2();
  }

  ///////////////////////////////////
  // Doing commands
  if(args[0].compare("-go")==0){ // do everything! Resolve by dependencies
    found=checkArgs(args,0);
    cout<<"Finestructure complete!"<<endl;
    cout<<"Get started by running the GUI with:"<<endl<<"\"finegui -c " <<cpchunkcounts<<" -m "<<fsmcmcoutputvec[0]<<" -t "<<fstreeoutputvec[0];
    if(fsmcmcoutput.size()>1) cout<<" -m2 "<<fsmcmcoutputvec[1]<<" -t2 "<<fstreeoutputvec[1];
    cout<<"\""<<endl;
    cout<<"Then click \"File->Open\", then \"Read Data File\", \"Read Pairwise Coincidence\" and \"Read Tree\". Then you can explore the results."<<endl;
    if(fsmcmcoutput.size()>1) cout<<"Check convergence results by click \"File->Manage Second Dataset\", then \"Read Data File\", \"Read Pairwise Coincidence\" and \"Read Tree\". Then close the window and \"View->Pairwise Coincidence\", then \"Second view->Enable Alternative Diagonal View\" and \"Second view->Use second dataset\", then finally \"Second view->Pairwise Coincidence\". The top right diagonal shows the second MCMC run."<<endl;
  }
  if(args[0].compare("-dos1")==0){
    found=checkArgs(args,0);
    checkStage(args,1);
    if(verbose) cout<<"RUNNING "<<s1commands.size()<<" stage1 command(s)!"<<endl;
    doCpStage(1);
  }
  if(args[0].compare("-dos2")==0){ 
    found=checkArgs(args,0);
    checkStage(args,2);
    if(s2commands.size()==0) makeStage2();
    if(verbose) cout<<"RUNNING "<<s2commands.size()<<" stage2 command(s)!"<<endl;
    doCpStage(2);
  }
  if(args[0].compare("-combines1")==0){ // EM parameter estimates combination
    found=checkArgs(args,0);
    checkStage(args,1);
    combineStage1();
  }

  if(args[0].compare("-combines2")==0){ // chromopainter main run combination
    found=checkArgs(args,0);
    checkStage(args,2);
    if(args.size()>1) cproot=args[1];
    else ensures2aroot();
    combineStage2();
  }

  ///////////////////////////////////
  // Writing command for hpc commands
  if(args[0].compare("-writes1")==0){
    found=checkArgs(args,0,1);
    checkStage(args,1);
    if(args.size()==2) s1commandfile=args[1];
    else ensureCommandfile(1);
    if(verbose) cout<<"Writing "<<s1commands.size()<<" stage 1 commands to "<<s1commandfile<<endl;
    writeStringVectorToFile(s1commands,s1commandfile,s1logfiles);
  }

  if(args[0].compare("-writes2")==0){
    found=checkArgs(args,0,1);
    checkStage(args,2);
    if(args.size()==2) s2commandfile=args[1];
    else ensureCommandfile(2);
    if(verbose) cout<<"Writing "<<s2commands.size()<<" stage 2 commands to "<<s2commandfile<<endl;
    writeStringVectorToFile(s2commands,s2commandfile,s2logfiles);
  }

  if(args[0].compare("-writes3")==0){
    found=checkArgs(args,0,1);
    checkStage(args,3);
    if(args.size()==2) s3commandfile=args[1];
    else ensureCommandfile(3);
    if(verbose) cout<<"Writing "<<s3commands.size()<<" stage 3 commands to "<<s3commandfile<<endl;
    writeStringVectorToFile(s3commands,s3commandfile,s3logfiles);
  }

  if(args[0].compare("-hpcs3")==0){
    found=checkArgs(args,0,1);
    checkStage(args,3);
    if(args.size()==2) s3commandfile=args[1];
    else ensureCommandfile(3);
    if(verbose) cout<<"Writing a single, parallel stage 3 command to "<<s3commandfile<<endl;
    writeHpcStage3(s3commandfile);
  }

  if(args[0].compare("-writes4")==0){
    found=checkArgs(args,0,1);
    checkStage(args,4);
    if(args.size()==2) s4commandfile=args[1];
    else ensureCommandfile(4);
    if(verbose) cout<<"Writing "<<s4commands.size()<<" stage 4 commands to "<<s4commandfile<<endl;
    writeStringVectorToFile(s4commands,s4commandfile,s4logfiles);
  }
  

  ///////////////////////////////////
  // Properties of the data
  if(args[0].compare("-haploid")==0){
    found=checkArgs(args,0);
    checkStage(args,1);
    ploidy=1;
    if(verbose) cout<<"Setting Haploid mode"<<endl;
  }

  ///////////////////////////////////
  // FINESTRUCTURE commands
  if(args[0].compare("-configmcmc")==0) { // set mcmc parameters
  //
    found=checkArgs(args,3);
    checkStage(args,3);
    s3iters=-1;
    istringstream ( args[1] ) >> s3itersburnin;
    istringstream ( args[2] ) >> s3iterssample;
    istringstream ( args[3] ) >> numskip;
  }
  if(args[0].compare("-ignoreGR")==0) { // set mcmc parameters
  //
    found=checkArgs(args,0);
    checkStage(args,3);
    ignoreGRfsmcmc();
  }
  if(args[0].compare("-makes3")==0) { // finestructure MCMC
  //
    found=checkArgs(args,0,2);
    checkStage(args,3);
    if(args.size()>1) istringstream ( args[1] ) >> numskip;
    if(args.size()>2) istringstream ( args[2] ) >> s3iters;
    makefsmcmc();
  }
  if(args[0].compare("-dos3")==0) { // finestructure MCMC
    found=checkArgs(args,0);
    checkStage(args,3);
    dofsmcmc();
  }
  if(args[0].compare("-combines3")==0) { // finestructure MCMC validation
    found=checkArgs(args,0);
    checkStage(args,3);
    combineStage3();
    if(!validatedoutput[3]){
      if(++fscounter <= fsmaxattempts){
	cout<<"WARNING: Stage 3 convergence criterion failed! Use \"-ignoreGR\" to continue regardless. (Set the parameter \"-threshGR:-1\" to ignore the GR statistic in future.)  Re-running MCMC for longer: this is attempt "<<fscounter<<" of "<<fsmaxattempts<<"."<<endl;
	continuefsmcmc();
      }else{
	cout<<"WARNING: Stage 3 convergence criterion failed! Set -threshGR -1 to ignore the GR statistic. Continuing to tree inference due to exceeding maximum number of attempts."<<endl;
      }
    }
  }
  if(args[0].compare("-makes4")==0) { // finestructure TREE
  //
    found=checkArgs(args,0,1);
    cout<<"Making tree in stage "<<stage<<endl;
    checkStage(args,4);
    if(args.size()==2) istringstream ( args[1] ) >> s4iters;
    makefstree();
  }
  if(args[0].compare("-dos4")==0) { // finestructure TREE
    found=checkArgs(args,0);
    checkStage(args,4);
    dofstree();
  }
  if(args[0].compare("-combines4")==0) { // finestructure TREE validation
    found=checkArgs(args,0);
    checkStage(args,4);
    combineStage4();
  }
  
  ///////////////////////////////////
  // Catch all others

  if(!found){
    try{
      found=applyVal(args);
    }catch (exception& e)  {
      throw(e);
    }
    if(!found){
  //      cerr<<"ERROR: Command or parameter "<<args[0]<<" not recognised"<<endl;
      throw(runtime_error(args[0]));
    }
  }

  
  /// Update the history
   std::ostringstream newhist;
  newhist<<"CMD:"<<args[0];
  for(unsigned int i=1;i<args.size();i++){
    newhist<<" "<<args[i];
  }
  newhist<<endl;
  historytext.append(newhist.str());
  
  // Stop if we need things to be run
  char * tstr_c=(char*) args[0].data();
  if(strstr (tstr_c,"-writep")!=NULL){
    std::ostringstream ss;
    ss<<"-writep missing stage"<<stage;
    throw(runtime_error(ss.str()));
  }

  // Update the project file
  safeCreateFile();
  if(verbose) cout<<"Writing project file..."<<endl;
  writeToFile();

}

void FsProject::matchCpInputs(){
  if(phasefiles.size()==0) throw(runtime_error("fsproject: No phase files defined!"));

  if(linkagemode.compare("linked")!=0) {
    // we have the unlinked model and we don't need recombination data
    return;
  }
  // else linked mode
  if(phasefiles.size()!=recombfiles.size()) throw(runtime_error("fsproject: Different number of phase to recombination files!"));

  return;
}

void FsProject::ensureCommandfile(int forstage)
{
  string ts;
  switch(forstage){
  case 1: ts=s1commandfile;break;
  case 2: ts=s2commandfile;break;
  case 3: ts=s3commandfile;break;
  case 4: ts=s4commandfile;break;
  default: 
    cerr<<"ERROR: ensureCommandfile received an invalid option!"<<endl;
    throw(logic_error("ensureCommandfile"));
  }
  if(ts.compare("")==0){
    ostringstream ss;
    ss<<dirname<<"/commandfiles";
    ensureDirectory(ss.str());
    ss<<"/commandfile"<<forstage<<".txt";
    switch(forstage){
    case 1: s1commandfile=ss.str();break;
    case 2: s2commandfile=ss.str();break;
    case 3: s3commandfile=ss.str();break;
    case 4: s4commandfile=ss.str();break;
    default: 
      throw(logic_error("ensureCommandfile"));
    }
  }
}

void FsProject::ensures1root(){
  if(s1outputroot.compare("")==0){
    ostringstream ss;
    ss<<fileroot<<"_tmp_EM_linked";
    if(ploidy==1)  ss<<"_haploid";
    s1outputroot=string(ss.str());
  }
}

void FsProject::ensures2root(){
  if(s2outputroot.compare("")==0) {
    ostringstream ss;
    ss<<fileroot<<"_tmp_mainrun."<<linkagemode;
    if(ploidy==1)  ss<<"_haploid";
    s2outputroot=string(ss.str());
  }
}

void FsProject::ensures2aroot(){
  if(cproot.compare("")==0){
    ostringstream ss;
    ss<<fileroot<<"_"<<linkagemode;
    if(s2chunksperregion>0)  ss<<"_cpr"<<s2chunksperregion;
    if(ploidy==1)  ss<<"_hap";
    cproot=ss.str();
  }
}

void FsProject::ensurefsroot(){
  if(fsroot.compare("")==0){
    ostringstream ss;
    ss<<fileroot<<"_"<<linkagemode;
    if(s2chunksperregion>0)  ss<<"_cpr"<<s2chunksperregion;
    if(ploidy==1)  ss<<"_hap";
    fsroot=ss.str();
  }
}

void FsProject::ensurefsmcmc(){
  ensurefsroot();
  if(fsmcmcoutput.compare("")==0){
    ostringstream ss;
    ss<<dirname<<"/stage3";
    ensureDirectory(ss.str());
    ss<<"/"<<fsroot<<"_mcmc";
    fsmcmcoutput=ss.str();
  }
  fsmcmcoutputvec.clear();
  for(int c1=0;c1<nummcmcruns;c1++){
    ostringstream ss;
    ss<<fsmcmcoutput;
    if(c1>0) ss<<"_run"<<c1;
    ss<<".xml";
    fsmcmcoutputvec.push_back(ss.str());
  }
}


void FsProject::ensurefstree(){
  ensurefsroot();
  if(fstreeoutput.compare("")==0){
    ostringstream ss;
    ss<<dirname<<"/stage4";
    ensureDirectory(ss.str());
    ss<<"/"<<fsroot<<"_tree";
    fstreeoutput=ss.str();
  }
  fstreeoutputvec.clear();
  for(int c1=0;c1<nummcmcruns;c1++){
    ostringstream ss;
    ss<<fstreeoutput;
    if(c1>0) ss<<"_run"<<c1;
    ss<<".xml";
    fstreeoutputvec.push_back(ss.str());
  }
}

std::string FsProject::makeoutfileroot(string root,int fileon,int indstart,int indend,int forstage){
  std::ostringstream ss;
  ss<<dirname<<"/"<<"stage"<<forstage<<"/";
  ensureDirectory(ss.str());

  if((indstart==0) & (indend==0)) ss<<root<<"_file"<<fileon+1<<"_allinds";
  else if (indstart==indend){
    ss<<root<<"_file"<<fileon+1<<"_ind"<<indstart;
  }else{
    ss<<root<<"_file"<<fileon+1<<"_ind"<<indstart<<"-"<<indend;
  }
  return(ss.str());
}

int FsProject::getNsnpsFromFile(unsigned int fileno){
  if(fileno>=phasefiles.size()) {
    cerr<<"ERROR: Need to have phase file "<< fileno<<" to count the number of SNPs in it!"<<endl;
    throw(logic_error("Asked to count SNPs of a file we don't have"));
  }
  vector<int> cpv=getChromoPainterHeaderInfo(phasefiles[fileno],ploidy);
  return(cpv[1]);
}

int FsProject::getNhapsFromFile(int which){

  // Otherwise get it from the phase file
  if((int)phasefiles.size()<which){
    cerr<<"ERROR: Need to have phase file to count the number of individiuals!"<<endl;
    throw(runtime_error("data missing"));
  }
  vector<int> cpv=getChromoPainterHeaderInfo(phasefiles[which],ploidy);
  return(cpv[0]);
}

bool FsProject::readmcmctraces(int filenum){
  string file=fsmcmcoutputvec[filenum];
  FsXml *fs=new FsXml(file);
  streampos fpos=fs->gotoLineContaining("<Iteration>");
  double t_posterior,t_beta,t_delta,t_f,t_k;
  int counts=0;
  
  
  mcmc_posterior.push_back(std::vector<double>());
  mcmc_k.push_back(std::vector<double>());
  mcmc_delta.push_back(std::vector<double>());
  mcmc_beta.push_back(std::vector<double>());
  mcmc_f.push_back(std::vector<double>());

  while(!fs->eof() && fpos>=0) {
    counts++;
    if(fpos>0) {
      string s_pos=fs->getParam("Posterior",fpos);
      string s_K=fs->getParam("K",fpos);
      string s_Beta=fs->getParam("beta",fpos);
      string s_Delta=fs->getParam("delta",fpos);
      string s_F=fs->getParam("F",fpos);
      istringstream ( s_pos ) >> t_posterior;
      istringstream ( s_K ) >> t_k;
      istringstream ( s_Beta ) >> t_beta;
      istringstream ( s_Delta ) >> t_delta;
      istringstream ( s_F ) >> t_f;
      
      mcmc_posterior[filenum].push_back(t_posterior);
      mcmc_k[filenum].push_back(t_k);
      mcmc_beta[filenum].push_back(log(t_beta));
      mcmc_delta[filenum].push_back(t_delta);
      mcmc_f[filenum].push_back(t_f);
      fpos=fs->gotoNextLineContaining("<Iteration>");
    }
  }
  delete(fs);
  if(verbose) cout<<"Read "<<mcmc_posterior[filenum].size()<<" iterations."<<endl;
  return(1);
}

double FsProject::mcmcGRstatistic(std::vector<std::vector<double> > *data){
  double nruns=(double)data->size();
  vector<double> runsums;
  vector<double> runsumsquares;
  double ntot=0;
  vector<double> runn;
  double combinedsum=0;
  double combinedsumsquares=0;
  // construct the sums and sums of squares that are needed
  for(unsigned int c1=0;c1<data->size();++c1){
    runsums.push_back(0);
    runsumsquares.push_back(0);
    runn.push_back(data->at(c1).size());
    ntot+=data->at(c1).size();
    for(unsigned int c2=0;c2<data->at(c1).size();++c2){
      combinedsum+=data->at(c1)[c2];
      runsums[c1]+=data->at(c1)[c2];
      combinedsumsquares+=data->at(c1)[c2]*data->at(c1)[c2];
      runsumsquares[c1]+=data->at(c1)[c2]*data->at(c1)[c2];
    }
  }
  // compute W, the within chain variance
  double totvar= combinedsumsquares/ntot - (combinedsum/ntot)*(combinedsum/ntot);
  totvar*=ntot/(ntot-1);
  vector<double> runvarest;
  double W=0;
  for(unsigned int c1=0;c1<data->size();++c1){
    double tv=(runsumsquares[c1]/runn[c1] - (runsums[c1]/runn[c1])*runsums[c1]/runn[c1]);
    runvarest.push_back(tv * runn[c1]/(runn[c1]-1));
    W += runvarest[c1]/nruns;
  }
  //  cout<<"GR Calc W="<<W <<" totvar="<<totvar<<endl;
  // compute B, the between chain variance
  double B=0;
  for(unsigned int c1=0;c1<data->size();++c1){
    B+= runn[c1]/(nruns-1) *(runsums[c1]/runn[c1] - combinedsum/ntot)*(runsums[c1]/runn[c1] - combinedsum/ntot);
  }
  //  cout<<"GR Calc B="<<B<<endl;
  // Compute the variance estimator for the combined chains
  double nbar=ntot/nruns;
  double sigma_hatsq=(nbar-1)*W/nbar + B/nbar;
  double Rhat = sqrt(sigma_hatsq/W);
  //  cout<<"GR Calc sigmahatsq="<<sigma_hatsq<<" Rhat="<<Rhat<<endl;
  if(sigma_hatsq==0 && W==0) Rhat=1;
  return(Rhat);
}

bool FsProject::mcmcConvergence(){
  if(fsmcmcoutputvec.size()==0){
    cerr<<"WARNING: mcmcConvergence: Haven't yet run the MCMC! Shouldn't be checking it at this stage."<<endl;
    return(1);
  }else if(fsmcmcoutputvec.size()==1){
    cerr<<"WARNING: mcmcConvergence: have only run one MCMC chain. Cannot check convergence."<<endl;
    return(1);
  }

  // Read the posterior traces
  mcmc_posterior.clear();
  mcmc_k.clear();
  mcmc_beta.clear();
  mcmc_delta.clear();
  mcmc_f.clear();

  for(unsigned int i=0;i<fsmcmcoutputvec.size();++i){
    if(verbose) cout<<"Reading MCMC traces for file "<<i+1<<" of "<<fsmcmcoutputvec.size()<<endl;
    if(!readmcmctraces(i)){
      cerr<<"ERROR: Cannot read MCMC traces for file "<<fsmcmcoutputvec[i]<<endl;
    }
  }
  
  stringstream mcmctracesfile;
  mcmctracesfile<<fsmcmcoutput<<".mcmctraces.tab";
  writemcmctraces(mcmctracesfile.str());

  // Compute the Gelman Rubin diagnostics
  mcmcGR.clear();
  mcmcGR.push_back(mcmcGRstatistic(&mcmc_posterior));
  mcmcGR.push_back(mcmcGRstatistic(&mcmc_k));
  mcmcGR.push_back(mcmcGRstatistic(&mcmc_beta));
  mcmcGR.push_back(mcmcGRstatistic(&mcmc_delta));
  mcmcGR.push_back(mcmcGRstatistic(&mcmc_f));
  if(verbose)  {
    cout<<"Gelman-Rubin statistics:";
    cout<<" GR(Log Posterior) = "<<mcmcGR[0];
    cout<<" GR(Number of populations K) = "<<mcmcGR[1];
    cout<<" GR(Log Beta) = "<<mcmcGR[2];
    cout<<" GR(Delta) = "<<mcmcGR[3];
    cout<<" GR(f) = "<<mcmcGR[4]<<endl;
  }
  if(threshGR<0) return(1);
  for(unsigned int i=0;i<mcmcGR.size();++i) { 
    if(mcmcGR[i]>threshGR) return(0);
  }
  return(1);
}

bool FsProject::writemcmctraces(std::string filename){
  ofstream ofile;
  ofile.open (filename.c_str());

  if(mcmc_posterior.size()==0) return(0);
  for(unsigned int c2=0;c2<mcmc_posterior.size();c2++){
    ofile<<"LogPosterior"<<c2<<" K"<<c2<<" beta"<<c2<<" delta"<<c2<<" f"<<c2<<" ";
  }
  ofile<<endl;
  for(unsigned int c1=0;c1<mcmc_posterior[0].size();c1++){
    for(unsigned int c2=0;c2<mcmc_posterior.size();c2++){
      ofile<<mcmc_posterior[c2][c1]<<" ";
      ofile<<mcmc_k[c2][c1]<<" ";
      ofile<<mcmc_beta[c2][c1]<<" ";
      ofile<<mcmc_delta[c2][c1]<<" ";
      ofile<<mcmc_f[c2][c1]<<" ";
    }
    ofile<<endl;
  }
  ofile.close();
  return(1);
}

void FsProject::ignoreGRfsmcmc(){
  if(stage>=3){ // If we are supposed to have output
    cout<<"INFO: Restoring previous MCMC results that failed the GR statistic test."<<endl;
    s3iterssample/=2;
    s3itersburnin=s3iterssample;
    numskip/=2;

    for(unsigned int c1=0;c1<old_fsmcmcoutputvec.size();++c1){
      if(access( filename.c_str(), F_OK ) == -1 ) { // file does not exist
	cerr<<"ERROR: Cannot find original MCMC file. Was the MCMC actually run, and did we reject it on the basis on Gelman-Rubin statistics? If not, try \"-threshGR:-1 -go\" instead of \"-ignoreGR\"."<<endl;
	throw(runtime_error("-ignoreGR cannot reconstruct original MCMC filename"));
      }
      if(verbose) cout<<"Renaming "<<old_fsmcmcoutputvec[c1].c_str()<<" to "<<fsmcmcoutputvec[c1].c_str()<<endl;
      if(rename(old_fsmcmcoutputvec[c1].c_str(),fsmcmcoutputvec[c1].c_str())!=0){
	cerr<<"WARNING: -ignoreGR cannot rename "<<old_fsmcmcoutputvec[c1].c_str()<<" to "<<fsmcmcoutputvec[c1].c_str()<<". Rerunning MCMC instead!"<<endl;
      }
    }
  }else{
      cout<<"INFO: Ignoring the Gelman-Rubin statistic for MCMC."<<endl;
  }

  // Things we do regardless
  threshGR=-1;
  old_fsmcmcoutputvec.clear();
}

void FsProject::continuefsmcmc(){
  cout<<"INFO: Doubling sample time and continuing from previous run"<<endl;
  old_fsmcmcoutputvec.clear();
  for(unsigned int c1=0;c1<fsmcmcoutputvec.size();++c1){
    size_t found =fsmcmcoutputvec[c1].find(".xml");
    if(found==string::npos || access( filename.c_str(), F_OK ) == -1 ) { // invalid filename, or file does not exist
      cerr<<"ERROR: Cannot reconstruct original MCMC filename from the parameter file. Was the MCMC actually run, and did we reject it on the basis on Gelman-Rubin statistics? If not, try \"-threshGR:-1 -go\" instead of \"-ignoreGR\"."<<endl;
      throw(runtime_error("continuefsmcmc cannot reconstruct original MCMC filename"));
    }
    string tstr=fsmcmcoutputvec[c1].substr(0,found);
    ostringstream ss;
    ss<<tstr<<"_x"<<s3itersburnin<<"_y"<<s3iterssample<<"_z"<<numskip<<".xml";
    old_fsmcmcoutputvec.push_back(ss.str());
    rename(fsmcmcoutputvec[c1].c_str(),old_fsmcmcoutputvec[c1].c_str());
  }
  fsmcmcoutput.clear();
  fsmcmcoutputvec.clear();

  if(s3iters>0){
    s3itersburnin=0;
    s3iterssample=s3iters;
    s3iters=-1;
  }else{
    s3itersburnin=0;
    s3iterssample*=2;
  }
  numskip*=2;

  
  vector<string> redocmd;
  redocmd.push_back("-makes3");
  docmd(redocmd);
  redocmd.clear();
  if(hpc) redocmd.push_back("-writes3");
  if(hpc!=1) redocmd.push_back("-dos3");
  docmd(redocmd);
  if(hpc==1) throw(runtime_error("fsproject: combines3 stage3 missing results"));
  redocmd.clear();
  redocmd.push_back("-combines3");
  docmd(redocmd);
}

int FsProject::getNindsFromFile(bool keepall){
  if(idfile.compare("")==0) {
    cerr<<"ERROR: idfile is required! You can create it with -createid or read an existing one with -idfile ."<<endl;
    throw(logic_error("idfile missing"));
  }
  std::vector<std::string> tnames=getIdsFromFile(idfile,keepall);
  return((int) tnames.size());
}

int FsProject::getUniqueNindsFromFile(bool keepall){
  if(idfile.compare("")==0) {
    cerr<<"ERROR: idfile is required! You can create it with -createid or read an existing one with -idfile ."<<endl;
    throw(logic_error("idfile missing"));
  }
  std::vector<std::string> tnames=unique(getIdsFromFile(idfile,keepall));
  return((int) tnames.size());
}

void FsProject::makeStage1()
{
  if(linkagemode.compare("linked")!=0) {
    return; ///< Nothing to do 
  }
  if(nsnpsvec.size()!=phasefiles.size()){
    throw(logic_error("-makes1 nsnpsvec problem"));
  }
  matchCpInputs(); ///< checks all is well
  ensures1root();///< construct the output file name root
  s1commands.clear(); ///<
  s1logfiles.clear(); ///<
  
  /// Figure out which individuals go in which files
  int indsmax=nindsUsed; 
  vector<int> indsvec,indsvecall; // a vector of individuals
  int tindsperproc=getIndsPerProc();
  int wantedinds=(int)(indsmax * s1indfrac);
  if(wantedinds>indsmax || wantedinds<=0){
    cerr<<"ERROR: Requested an invalid number ("<<wantedinds<<") of individuals due to -s1indfrac: 0<s1indfrac<=1, with at least one individual."<<endl;
    throw(runtime_error("-makes1 s1indfrac problem"));
  }

  // Check if we need to drop to processing each ind separately, because we only process a subset of individuals
  if(wantedinds<indsmax){ // if we are only processing a subset
    tindsperproc=1;
    for(int indon=1;indon<=indsmax;indon++) indsvecall.push_back(indon);
  }
  
  // Check if we need to drop to processing each ind separately, because we only process a subset of snps
  vector<bool> usesubset;
  vector<int> wantedsnps;
  for(unsigned int fileon=0;fileon<phasefiles.size();fileon++) {
    int tsnps=nsnpsvec[fileon];
    if((s1snpfrac>0)&&(s1minsnps>0)) tsnps=max((int)(nsnpsvec[fileon]*s1snpfrac),s1minsnps);
    wantedsnps.push_back(tsnps);
    if(wantedsnps[fileon]<nsnpsvec[fileon]){ // we are allowed to do less processing
      tindsperproc=1;// must do each ind separately
      usesubset.push_back(true);
    }else usesubset.push_back(false);
  }

 // Construct the commands
  for(unsigned int fileon=0;fileon<phasefiles.size();fileon++) {
    if(indsvecall.size()>0){ // process a different random subset per file
      indsvec=sampleVec(indsvecall,wantedinds);
    }
    /// Loop over individuals
    for(int tindon=1;tindon<=wantedinds;tindon+=tindsperproc){
      int indon=tindon;// tindon is the index of the individual we are processing; indon is the actual individual (in case we sample)
      std::ostringstream ss;
      int indonend=min(indon+tindsperproc-1,indsmax); // make sure we don't process past the end individual
      if(tindsperproc==0){// process all in one go
	indon=indonend=0;
      }else if(indsvecall.size()>0){ // process only a subset
	if(indon-1>=(int)indsvec.size()) throw(logic_error("makes1: indsvec error, reached an individual that shouldn't exist"));
	indon=indonend=indsvec[indon-1];
      }
      ss<<"cp";
      string toutfile=makeoutfileroot(s1outputroot,fileon,indon,indonend,1);
      if(ploidy==1) ss<<" -j";
      ss<<" -i "<<s1emits;
      if(usesubset[fileon]){
	ss<<" -e "<<wantedsnps[fileon];
      }
      if(s1args.compare("")!=0) ss<<" "<<s1args;
      if(s12args.compare("")!=0) ss<<" "<<s12args;
      if(idfile.compare("")!=0) ss<<" -t "<<idfile;
      if(s12donorfile.compare("")!=0) {
	ss<<" -f "<<s12donorfile<<" "<<indon<<" "<<indonend;
      }else{
	ss<<" -a "<<indon<<" "<<indonend;
      }
      ss<<" -g "<<phasefiles[fileon]<<" -r "<<recombfiles[fileon]<<" -o "<<toutfile;      s1outputrootvec.push_back(toutfile);
      s1commands.push_back(ss.str());
      toutfile.append(".log");
      if(outputlogfiles) s1logfiles.push_back(toutfile);

      if(verbose){
	cout<<"CREATING S1 CMD:"<<ss.str()<<endl;
      }
      if(tindsperproc==0) tindon=wantedinds+1; // make sure we end in -a 0 0 mode
    }
  }
}

void FsProject::makeStage2()
{
  matchCpInputs(); ///< checks all is well
  ensures2root();///< construct the output file name root
  s2commands.clear(); ///<
  
  if(((Neinf<0)| (muinf<0)) && linkagemode.compare("linked")==0){
    cerr<<"Must have inferred Ne and mu when running in linkage mode. Has stage1 been completed? Has -combines1 been run?"<<endl;
    throw(runtime_error("fsproject: makes2 requires combines1"));
  }
  
  /// Figure out which individuals go in which files
  int tindsperproc=getIndsPerProc();

  // Construct the commands
  for(unsigned int fileon=0;fileon<phasefiles.size();fileon++){
    for(int indon=1;indon<=nindsUsed;indon+=tindsperproc){
      std::ostringstream ss;
      int indonend=min(indon+tindsperproc-1,nindsUsed); // make sure we don't process past the end individual
      ss<<"cp";
      string toutfile=makeoutfileroot(s2outputroot,fileon,indon,indonend,2);
      if(ploidy==1) ss<<" -j";
      if(s2args.compare("")!=0) ss<<" "<<s2args;
      if(s12args.compare("")!=0) ss<<" "<<s12args;
      if(idfile.compare("")!=0) ss<<" -t "<<idfile;
      if(s12donorfile.compare("")!=0) {
	ss<<" -f "<<s12donorfile<<" "<<indon<<" "<<indonend;
      }else{
	ss<<" -a "<<indon<<" "<<indonend;
      }
      if(linkagemode.compare("linked")==0) {
	ss<<" -r "<<recombfiles[fileon]<<" -n "<<Neinf<<" -M "<<muinf;
      }else {
	ss<<" -u";
      }
      if(s2samples>0) ss<<" -s "<<s2samples;
      if(s2chunksperregion>=0) ss<<" -k "<<s2chunksperregion;
      else ss<<" -k "<<defaultChunksperregion();
      ss<<" -g "<<phasefiles[fileon];
      ss<<" -o "<<toutfile;
      s2outputrootvec.push_back(toutfile);
      s2commands.push_back(ss.str());
      toutfile.append(".log");
      if(outputlogfiles) s2logfiles.push_back(toutfile);
      if(verbose){
	cout<<"CREATING S2 CMD:"<<ss.str()<<endl;
      }
    }
  }
  stage=2;
}


void FsProject::makefsmcmc(){
  if(s3iters>0){
    s3iterssample=s3iters/2;
    s3itersburnin=s3iters/2;
  }
  if(numskip<0){
    numskip = max(1,(int)floor(s3iterssample/maxretained));
  }
  ensurefsmcmc();
  s3commands.clear();
  for(int runon=0;runon<nummcmcruns;runon++){
    std::ostringstream ss;
    ss<<"fs -s "<<runon*2+1;
    if(s34args.compare("")!=0) ss<<" "<<s34args;
    ss<<" -x "<<s3itersburnin<<" -y "<<s3iterssample<<" -z "<<numskip<<" "<<cpchunkcounts<<" ";
    if(old_fsmcmcoutputvec.size()==fsmcmcoutputvec.size()) ss<<old_fsmcmcoutputvec[runon]<<" ";
    ss<<fsmcmcoutputvec[runon];
    s3commands.push_back(ss.str());
    string toutfile=fsmcmcoutputvec[runon];
    toutfile.append(".log");
    if(outputlogfiles) s3logfiles.push_back(toutfile);

    if(verbose){
      cout<<"CREATING S3 CMD:"<<ss.str()<<endl;
    }
  }
  stage=3;
}

void FsProject::makefstree(){
  ensurefstree();
  s4commands.clear();
  for(int runon=0;runon<nummcmcruns;runon++){
    std::ostringstream ss;
    ss<<"fs -m T -s "<<runon*2+1;
    if(s34args.compare("")!=0) ss<<" "<<s34args;
    ss<<" -x "<<s4iters<<" "<<s4args<<" "<<cpchunkcounts<<" "<<fsmcmcoutputvec[runon]<<" "<<fstreeoutputvec[runon];
    s4commands.push_back(ss.str());
    string toutfile=fstreeoutputvec[runon];
    toutfile.append(".log");
    if(outputlogfiles) s4logfiles.push_back(toutfile);
    if(verbose){
      cout<<"CREATING S4 CMD:"<<ss.str()<<endl;
    }
  }
}

void FsProject::dofsmcmc(){
  // Some sanity checks?
  // have we run chromocombine?
  if(cval<0) {
      cerr<<"FineStructure MCMC cannot be run without a valid 'c' value. Did chromocombine run? Did it calculate 'c'?"<<endl;
      throw(runtime_error("finestructure requires -combines2 to have completed successfully"));
  }
  // 
  int cmdon=0;
  if(numthreads>0) {
    do_omp_set_num_threads(numthreads);
  }
#pragma omp parallel for
  for(int runon=0;runon<nummcmcruns;runon++){
    int thread_number = get_omp_get_thread_num();

    string tsv=s3commands[runon];
    if(thread_number>0) tsv.append(" -S");
    std::vector<char *> argv=converttoargv(tsv);
    //    string logfile=fsmcmcoutputvec[runon];
    //    logfile.append(".log");
#pragma omp atomic
    cmdon++;

#pragma omp critical
    cout<<(ParallelStream()<<"Running stage 3 (mcmc) command number (~"<<cmdon<<") of "<<nummcmcruns).toString()<<endl;
    if(verbose) cout<<"RUNNING S3 CMD:"<<tsv<<endl;


    int rv=finestructure(argv.size(),argv.data());
    freeargv(argv);

    // Check that it ran correctly
    if(rv!=0){
      cerr<<"FineStructure MCMC failed!"<<endl;// See log file ("<<logfile<<") for details."<<endl;
      throw(runtime_error("finestructure"));
      }
  }// end for

}

void FsProject::dofstree(){
  // Some sanity checks?
  
  // 
  int cmdon=0;
  if(numthreads>0) {
    do_omp_set_num_threads(numthreads);
  }
#pragma omp parallel for
  for(int runon=0;runon<nummcmcruns;runon++){
    int thread_number = get_omp_get_thread_num();

    string tsv=s4commands[runon];
    if(thread_number>0) tsv.append(" -S");
    std::vector<char *> argv=converttoargv(tsv);
    //    string logfile=fstreeoutputvec[runon];
    //    logfile.append(".log");
#pragma omp atomic
    cmdon++;

#pragma omp critical
    cout<<(ParallelStream()<<"Running stage 4 (tree) command number (~"<<cmdon<<") of "<<nummcmcruns).toString()<<endl;
    if(verbose) cout<<"RUNNING S4 CMD:"<<tsv<<endl;

    //switchStdout(logfile.c_str());

    int rv=finestructure(argv.size(),argv.data());
    freeargv(argv);
    //    revertStdout();
    // Check that it ran correctly
    if(rv!=0){
      cerr<<"FineStructure TREE failed!"<<endl;// See log file ("<<logfile<<") for details."<<endl;
      throw(runtime_error("finestructure"));
      }
}// end for

}

void FsProject::optionsHelp(){
  cout<<fsoptionshelp;
  cout<<"IMPORTANT PARAMETERS:"<<endl;
  cout<<cmdInfo("idfile",false)<<endl;
  cout<<cmdInfo("phasefiles",false)<<endl;
  cout<<cmdInfo("recombfiles",false)<<endl;
  cout<<"IMPORTANT ACTIONS:"<<endl;
  for(int c1=0;c1<3;c1++){
    cout<<"   ";
    cout<<cmdInfo(cmds[c1].getName(),false)<<endl;
  }
}

void FsProject::outputHelp(){
  cout<<"FILES CREATED, in order of importance."<<endl<<"IMPORTANT FILES:"<<endl;
  cout<<"\t<projectname>.cp: The finestructure parameter file, containing the state of the pipeline."<<endl;
  cout<<"\t<projectname>_<linked>.chunkcounts.out: Created by stage 2 combine: The final chromopainter painting matrix, giving the number of chunks donated to individuals in rows from individuals in columns. The first line containt the estimate of \"c\"."<<endl;
  cout<<"\t<projectname>_<linked>.chunklengths.out: Created by stage 2 combine: The final chromopainter painting matrix, giving the total recombination map distance donated to individuals in rows from individuals in columns."<<endl;
  cout<<"\t<projectname>_<linked>.mcmc.xml: Created by stage 3 combine: The main MCMC file of the clustering performed by fineSTRUCTURE."<<endl;
  cout<<"\t<projectname>_<linked>.tree.xml: Created by stage 4 combine: The main \"tree\" created from the best MCMC state by fineSTRUCTURE."<<endl;  
  cout<<"\t<projectname>: A folder containing all pipeline files."<<endl;
  cout<<"\t<projectname>/commandfiles/commandfile<X>.txt: The commands to be run to complete stage X. (-hpc 1 mode only)"<<endl;
  cout<<"USEFUL FILES:"<<endl;
  cout<<"\t<projectname>/stage<X>: folders containing all pipeline files for a stage X."<<endl;
  cout<<"\t<projectname>/cpbackup/<projectname>.cp<X>.bak: Backups of the parameter file, created after every action."<<endl;
  cout<<"\t<projectname>/stage1/*_EM_linked_file<f>_ind<i>.EMprobs.out: Created by stage 1: The chromopainter parameter estimate files (indexed f=1..<num_phase_files>, in the order given) for the individuals in the order encountered in the idfile (omitting individuals specified as such)."<<endl;
  cout<<"\t<projectname>/stage2/*_mainrun_file<f>_ind<i>.*: Created by stage 2: All chromopainter files created with the same parameters for all individuals (indexed f=1..<num_phase_files>, in the order given) for the individuals in the order encountered in the idfile (omitting individuals specified as such).  See the \"fs cp\" help for details."<<endl;
  cout<<"\t<projectname>/stage3/*_linked_mcmc_run<r>.xml: Created by stage 3: all further MCMC runs beyond the first (r=1..nummcmcruns-1)."<<endl;
  cout<<"\t<projectname>/stage3/*_mcmc.mcmctraces.tab: Created by stage 3 combine: The mcmc samples from all runs in a single file."<<endl;
  cout<<"\t<projectname>/stage4/*_linked_mcmc_run<r>.xml: Created by stage 4: all further trees beyond the first (r=1..nummcmcruns-1)."<<endl;
  cout<<"OTHER FILES:"<<endl;
  cout<<"\t<projectname>_<linked>.mutationprobs.out: Created by stage 2 combine: The final chromopainter painting matrix, giving the *expected number of SNPs donated with error* to individuals in rows from individuals in columns."<<endl;
  cout<<"\t<projectname>_<linked>.regionchunkcounts.out: Created by stage 2 combine: an intermediate file for calculating \"c\". See fs cp help for details."<<endl;
  cout<<"\t<projectname>_<linked>.chunklengths.out: Created by stage 2 combine: an intermediate file for calculating \"c\". See fs cp help for details."<<endl;
  cout<<"\t<projectname>/stage3/*_linked_mcmc_run<r>_x<x>_y<y>_z<z>.xml: Created by stage 3 when MCMC fails convergence tests. This is a backup of where each MCMC run reached, and is used as a starting point for the next run."<<endl;
  cout<<"\t<projectname>/stage<X>/*.log: Log files created by each stage, 1,2,2a (combining stage2 output across chromosomes),3 and 4."<<endl;
}

void FsProject::stagesHelp(){
  cout<<stageshelpheader;
  cout<<"==== pre-stage0 ===="<<endl;
  cout<<"#### stage0 ####"<<endl;
  cout<<"Data conversion. Currently not implemented!"<<endl;
  cout<<"==== post-stage0 ===="<<endl;
  cout<<cmdInfo("countdata")<<endl;
  cout<<"==== pre-stage1 ===="<<endl;
  cout<<"Important note: stage1 is skipped when running in unlinked mode (no recombination file provided)"<<endl;
  cout<<cmdInfo("makes1")<<endl;
  cout<<"#### stage1 #### Chromopainter parameter inference"<<endl;
  cout<<cmdInfo("dos1")<<endl;
  cout<<cmdInfo("writes1")<<endl;
  cout<<"==== post-stage1 ===="<<endl;
  cout<<cmdInfo("combines1")<<endl;
  cout<<"==== pre-stage2 ===="<<endl;
  cout<<cmdInfo("makes2")<<endl;
  cout<<"#### stage2 #### Chromopainter painting"<<endl;
  cout<<cmdInfo("dos2")<<endl;
  cout<<cmdInfo("writes2")<<endl;
  cout<<"==== post-stage2 ===="<<endl;
  cout<<cmdInfo("combines2")<<endl;
  cout<<"==== pre-stage3 ===="<<endl;
  cout<<cmdInfo("makes3")<<endl;
  cout<<"#### stage3 #### FineSTRUCTURE MCMC inference"<<endl;
  cout<<cmdInfo("dos3")<<endl;
  cout<<cmdInfo("writes3")<<endl;
  cout<<"==== post-stage3 ===="<<endl;
  cout<<cmdInfo("combines3")<<endl;
  cout<<"==== pre-stage4 ===="<<endl;
  cout<<cmdInfo("makes4")<<endl;
  cout<<"#### stage4 #### FineSTRUCTURE tree inference"<<endl;
  cout<<cmdInfo("dos4")<<endl;
  cout<<cmdInfo("writes4")<<endl;
  cout<<"==== post-stage4 ===="<<endl;
  cout<<cmdInfo("combines4")<<endl;
  cout<<"Not a command, but if -go gets here, we will provide the GUI command line for visualising and exploring the results."<<endl;
}

void FsProject::getHelp(std::vector<std::string> args){
  unsigned int offset=1;
  while(args.size()>offset){
    /// Handle special cases:
    if(args[offset].compare("all")==0){ // all : do actions and parameters
      args.erase(args.begin()+offset);
      args.push_back("actions");
      args.push_back("parameters");
    }
    if(args[offset].compare("actions")==0 ||args[offset].compare("commands")==0){ // actions: get all actions
      args.erase(args.begin()+offset);
      for(unsigned int c1=0;c1<cmds.size();c1++){
	args.push_back(cmds[c1].getName());
      }
    }
    if(args[offset].compare("parameters")==0){ // parameters: get all parameters
      args.erase(args.begin()+offset);
      for(unsigned int c1=0;c1<pars.size();c1++){
	args.push_back(pars[c1].getName());
      }
    }
    // Separate helps for non-commands/parameters
    if(args[offset].compare("input")==0){ // input format help
      args.erase(args.begin()+offset);
      cout<<inputhelp0<<inputhelpidfile<<inputidfileexample<<endl;
      cout<<inputhelpphase<<inputphaseexample<<endl;
      cout<<inputhelprec<<inputrecexample<<endl<<inputhelp1;
    }else if(args[offset].compare("stages")==0){ // help on what happens in each stage
      args.erase(args.begin()+offset);
      stagesHelp();
    }else if(args[offset].compare("info")==0){
      args.erase(args.begin()+offset);
      cout<<fsprojecthelp<<endl;
    }else if(args[offset].compare("tools")==0){
      args.erase(args.begin()+offset);
      cout<<fstoolshelp<<endl;
    }else if(args[offset].compare("example")==0){ // Run through the example
      args.erase(args.begin()+offset);
      makeExample();
    }else if(args[offset].compare("output")==0){ // help on the files created
      args.erase(args.begin()+offset);
      outputHelp();
    }else{
      // Now we just give help on everything remaining
      cout<<"Help for "<<cmdInfo(args[offset])<<endl;
      args.erase(args.begin()+offset);
    }
  }
}

void FsProject::makeExample(){
  try{
    writeStringToFile(inputidfileexample,"exampledata.idfile");
    writeStringToFile(inputphaseexample,"exampledata.phase");
    writeStringToFile(inputrecexample,"exampledata.recombfile");
    cout<<"Created three example input files:\n\
exampledata.idfile : the id file\n\
exampledata.phase : the phase file\n\
exampledata.recombfile : the recombination file.\n\
To run this example, try:\n";
    cout<<exampletext;
    cout<<"Note: because this example dataset has so few snps, we have to specify the \"chunks per region\" (via -s2chunksperregion) for it to work. Also try omitting this, you should be prompted with how to fix it!"<<endl;
    cout<<"Also note that if you don\'t change the file name, e.g. fs example2.cp, fs will try to continue your previous run, which may have completed. Restart it anew with -n."<<endl;
  }catch(runtime_error& e){
    cerr<<"Error creating example files!"<<endl<<e.what()<<endl; 
    throw(runtime_error("fsproject: cannot create examples"));
  }
}

////////////////////////////////
//
int FsProject::applyFiles(string readfile,string actionifdirexists){
    int dval=directoryExists(dirname);
    if(dval<0){// is a file, we are stuck
	cerr<<"ERROR: project directory with name "<<dirname<<" exists but is not a directory. Delete or move this manually and rerun!"<<endl;
	return(-1);
    }
    if(dval==1){ // is a directory, probably from a previous run
      if(actionifdirexists.compare("stop")==0){
	cerr<<"ERROR: project directory with name "<<dirname<<" already exists. Delete or move this manually and rerun!"<<endl;
	return(-1);
      }else if(actionifdirexists.compare("delete")==0){
	cerr<<"IMPORTANT: You have specified to remove the previously existing directory "<<dirname<<". This cannot be undone, and all backup information has been lost."<<endl;
	deleteFolderTree(dirname);
	dval=directoryExists(dirname);
      }
    }
    
    int fval=access( filename.c_str(), F_OK );
    if((fval != -1)&&(dval==1)) { // file and directory exist
      if((readfile.compare("detect")==0) | (readfile.compare("read")==0)){ // read it
	readFromFile();
	if(verbose) cout<<"Reading project file "<<filename<<"..."<<endl;
      }else{ // exists but told not to read it!
	cerr<<"WARNING: file "<<filename<<" exists but is being overwritten! You may wish to rerun without the \"-n\" option. If this command completes, you will also need to restore from the backup file. Ignore this warning if you meant to overwrite the project."<<endl;
      }
    }else{ // no file 
      if(readfile.compare("read")==0){  // doesn't exist but told to read it!
	cout << "Told to read a file (-n option) but this is not possible."<<endl;
	string fexists="does not exist", dexists="does not exist";
	if(fval != -1) fexists="exists";
	if (dval==1) dexists="exists";
	cout<<" filename " << filename<<" "<<fexists<<endl;
	cout<<" directory " << dirname<<" "<<dexists<<endl;
	return(1);
      }
      if(verbose) cout<<"Creating new project directory "<<dirname<<"..."<<endl;
      ensureDirectory(dirname);
      if(verbose) cout<<"Creating new project file "<<filename<<"..."<<endl;
    }
    return(0);
}

int fsproject(int argc, char *argv[]) {
    unsigned int argon=0, argtmp=0;
    std::string filename=string("fsproject.cp");
    std::string dirname=string("./fsproject");
    bool verbose=false;
    int endstatus=0;
    std::string readfile=string("detect");
    std::string diraction=string("merge");
    std::vector<std::string> args(argv, argv+argc) ;
    args.erase(args.begin());


    FsProject *proj;
    try{ proj = new FsProject(filename,dirname,verbose);
    }catch (exception& e)  {
      cout << "Standard exception: " << e.what() << endl;
      return(1);
    }

    // Check for general options
    // Check if there are no options
    if(args.size()==0){
      proj->optionsHelp();return 0;
    }
    
    // check for, and read the filename 
    if(args[0].substr(0,1).compare("-")!=0){ // no leading \"-\"
      filename=string(args[0]);
      dirname=projectroot(filename);
      filename=projectfull(dirname);
      if(args[0].compare(filename)!=0){
	cout<<"WARNING: you have not included the \".cp\" ending in the project name. This can cause problems if you try to call it the same name as a fs tool.\n";
      }
      if(verbose) cout<<"Using filename "<<filename<<" with directory "<<dirname<<endl;
      //interpret as a filename
      args.erase(args.begin());
    }else if((args[0].compare("-help")!=0 && args[0].compare("-h")!=0)){
      cout<<"Must provide the filename <projectname>.cp before any actions or parameter settting. See \"fs -h\" for help on this mode, or \"fs\" for general help."<<endl;
      return 0;
    }

    // Process the remaining parameters
    argon=0;
    while(argon<args.size()) {
      argtmp=argon;
      if(args[argon].compare("-h")==0 || args[0].compare("-help")==0){ // help!
	std::vector<std::string> args1(args.begin()+argon, args.begin()+args.size()) ; // the arguments of the command
	if(args1.size()==1) proj->optionsHelp();
	else proj->getHelp(args1); 
	return 0;
      }else if(args[argon].compare("-v")==0) {	// verbose
	verbose=true;
	cout<<"Verbose mode"<<endl;
	args.erase(args.begin() + argon);
      }else if(args[argon].compare("-n")==0) {	// new file
	readfile=string("new");
	diraction=string("merge");
	args.erase(args.begin() + argon);
      }else if(args[argon].compare("-N")==0) {	// new file
	readfile=string("new");
	diraction=string("delete");
	args.erase(args.begin() + argon);
      }else{
	argon++;
      }
    }


    ///////////////////////////
    // set up project structure
    if(verbose) cout<<"Using directory "<<dirname<<" and file "<<filename<<endl;
    proj->setFileName(filename);
    proj->setDirectoryName(dirname);
    proj->setVerbose(verbose);

    ///////////////////////////
    // read file if appropriate
    int fval=proj->applyFiles(readfile,diraction);
    if(fval==1) return(fval);
    proj->addHistory(args);
    ////////////////////////////

    if(verbose) cout<<"Processing commands..."<<endl;
    argon=0;
    argtmp=0;
    int cmdon=0;
    int lastcmdon=cmdon;
    while(argon<args.size()){
      if(verbose) cout<<"Processing command argument "<<cmdon<<":";
      if(verbose) cout<<" \""<<args[argon];
      argtmp=argon+1;
      while(argtmp<args.size() && args[argtmp].at(0)!='-') {
	if(verbose) cout<<"\" \""<<args[argtmp];
	argtmp++;
      }
      if(verbose) cout<<"\""<<endl;
      std::vector<std::string> args1(args.begin()+argon, args.begin()+argtmp) ; // the arguments of the command
      args1 = getcommands(args1);/// extract the arguments split by comma
      try{
	proj->docmd(args1); // do the command
	argon=argtmp;
	cmdon++;
      }catch (runtime_error& e)  {
	string swhat=e.what();
	char * swhat_c=(char*) swhat.data();
	char * tmissing;
	tmissing = strstr (swhat_c,"missing");
	endstatus=-1;
	if ((proj->getHpc()==1)&&(tmissing!=NULL)) {
	  int stage=0;
	  string tcmdfile;
	  if(strstr (swhat_c,"stage1")!=NULL){
	    stage=1;
	  }else if(strstr (swhat_c,"stage2")!=NULL){
	    stage=2;
	  }else if(strstr (swhat_c,"stage3")!=NULL){
	    stage=3;
 	  }else if(strstr (swhat_c,"stage4")!=NULL){
	    stage=4;
	  }else{
	    cerr << "ERROR: chromopainter failed. Check the input files and logs ... saving progress so far." <<endl;
	    argon=args.size();
	    break;
	  }
	  // If we get here, it is a valid hpc escape
	  cmdon++;
	  argon=argtmp;
	  endstatus=1;
	  tcmdfile=proj->getCommandFile();
	  cout<<"HPC mode: "<<proj->getCommandFileCount()<<" commands for stage"<<stage<<" written to file "<<tcmdfile<<". Rerun when those commands have been completed and the results copied back to this directory."<<endl;
	  cout<<"SUGGESTIONS:"<<endl<<"> cat "<<tcmdfile<<" | parallel # for local execution in parallel"<<endl;
	  cout<<"> qsub_run.sh -f "<<tcmdfile<<" -n "<<proj->recommendN()<<" -m "<<proj->recommendM()<<" # for qsub HPC systems -n <n> is the number of cores requested on each HPC node and -m <m> is the number of commands sent to each node (not core). These values are suggested to keep the total number of qsub jobs low ~(10-100)."<<endl;
	}else if (swhat.compare("chromopainter")==0){
	  cerr << "ERROR: chromopainter failed. Check the input files and logs ... saving progress so far." <<endl;
	}else if (swhat.compare("chromocombine")==0){
	  cerr << "ERROR: chromocombine failed. Check the input files and logs ... saving progress so far." <<endl;
	}else{
	  cerr << "ERROR: Invalid command: "<<e.what()<<" ... saving progress so far." <<endl;
	}
      }catch (exception& e)  {
	cerr << "ERROR: Failed to perform command:";
	for(unsigned int i=0;i<args1.size();i++){
	  cerr<<" "<<args1[i];
	}
	cerr<<endl;
	cerr << "ERROR: Exception: " << e.what() << endl <<"... saving progress so far."<<endl;
	cerr << "Saving progress so far."<<endl;
	argon=args.size();
	break;
      }catch(...) {
	cerr<<"ERROR! Unknown error."<<endl;
      }
      if(lastcmdon==cmdon){
	cerr<<"ERROR: Commands failed to process."<<endl;
	argon=args.size();
      }else lastcmdon=cmdon;
    }

    //////////////////////////////
    /*
    if(verbose) cout<<"Configuring project file..."<<endl;
    try{ 
      proj->safeCreateFile();
      if(verbose) cout<<"Writing project file..."<<endl;
      proj->writeToFile();
    }catch (exception& e)  {
      cout << "Standard exception: " << e.what() << endl;
    }
    */
    if(endstatus<0) {
      cout<<"IMPORTANT: The run ended on an error. You should read the error above and correct it."<<endl;
    }else if(endstatus>0){
      string tcmdfile=proj->getCommandFile();
      cout<<"IMPORTANT: The run ended with a requirement to run commands externally from file \""<<tcmdfile<<"\". Once you have done this, resume the analysis with \"fs "<<filename<<" -go\""<<endl;      
    }
    delete(proj);
    return(0);
}
