#ifndef FSPROJECT_H
#define FSPROJECT_H
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
#include <stdlib.h>
#include <fstream>
#include <string.h>

#include "fscmds.h"
#include "fssettings.h"
#include "fsparam.h"
//#include "finestructure/fsxml.h"

using namespace std;
using namespace fines;

/**
    @brief Project mode
*/
class FsProject
{
 public:
  FsProject(std::string f,std::string d,bool verbose);///< Create from a file
  ~FsProject();///< Destructor

  void setupSections();///< Setup the file structure for the project (comments and freetext comments)
  void addHistory(std::vector<std::string> args);///< Adds the command with all arguments to the history file
  void defineParameters();///< Defines the parameters we can set
  void defineCommands();///< Defines the commands we can run
  void readFromFile();///< Read the project from the file
  bool applyVal(FsSettingsValue val);///< Read a specific value from file
  bool applyVal(std::vector<std::string> args);///<

  void writeToFile();////< Write the project to the file
  void safeCreateFile();///< Create the project file, moving files of the same name out of the way
  void setFileName(std::string f);///< Set the filename of the project file
  void setDirectoryName(std::string d);///< Set the filename of the project file
  void setVerbose(bool verbose);///< Set the verbosity level
  std::string getFileName();///< obtain the filename being used
  std::string getDirectoryName();///< obtain the directory being used

  int defaultChunksperregion();///< Default number of chunks per region
  void copyfile(std::string from, std::string to); ///< Copy a file
  void resetToStage(int newstage);///< Reset settings to a stage by removing some settings from future stages
  void createDuplicated(int newstage, string newname);///< Duplicate the appropriate parts of the file structures to enable everything that can be reused, to be reused.
  int applyFiles(string readfile,string actionifdirexists=string("merge"));///<create directories and read files as appropriate to "readfile" (either "new","read", or "detect"). If it exists we either "merge", "delete" or "stop"
  bool haveOutput(int stage,bool forcombined=false);///< check that we have the output we expect from the a stage
  string whichLinkagemode();///< Which linkage mode should we use?
  void countData();///< Count and simply check the datasets we've been given
  
  bool canDo(string cmd);///< check dependencies for a command
  int parIndex(std::string partest);///< get the index of a parameter (-1 for missing)
  int cmdIndex(std::string cmdtest);///< get the index of a command (-1 for missing)
  std::string cmdInfo(std::string cmd, bool statetype=true);///< return the information/help for a command
  std::vector<std::string> getDependencies(std::vector<std::string> args);///< Gets the previous command that needs to be run
  bool checkArgs(std::vector<std::string> args,int minargs,int maxargs=-1);///< Checks that the command described by args has between minargs and maxargs arguments. maxargs=-1 means maxargs=minargs, -2 means unbounded
  int checkStage(std::string args,std::string val);/// return the stage for a *parameter*. Return 0 on a problem and 1 on OK
  void checkStage(std::string args,int maxstage);///< Check that args is allowed to run at the current stage
  void checkStage(std::vector<std::string> args,int maxstage);///< Check that args[0] is allowed to run at the current stage
  void docmd(std::vector<std::string> args);///< do a command line command
  std::vector<char *> converttoargv(std::string s); ///< Convert a string into a argv
  void freeargv(std::vector<char *> ccmd); ///< Free the argv array
  
  void createIdFile(string idfile);///< Creates an ID file from the first stage file

  bool finishedStage(int stage);/// check if we have finished a stage

  void ensures1root(); ///< Makes sure we have a stage 1 root file name
  void ensures2root(); ///< Makes sure we have a stage 2 root file name
  void ensures2aroot(); ///< Makes sure we have a stage 2a root file name
  void ensureCommandfile(int forstage); ///< Makes sure we have a stage 1 root file name

  std::string makeoutfileroot(string root,int fileon,int indstart,int indend,int forstage);///< Contruct the output file from the run details

  void matchCpInputs();///< Check that we have valid input files for cp 
  int getNhapsFromFile(int which);///< Extract the number of haps from a stage file
  int getNsnpsFromFile(unsigned int fileno);///< Extract the number of SNPs from a file
  int getNindsFromFile(bool keepall=true);///< count the number of recipients to process
  int getUniqueNindsFromFile(bool keepall=true);///< count the number of unique recipients to process
  void makeStage1();///< Make the commands to be run in stage1
  void makeStage2();///< Make the commands to be run in stage2

  void doCpStage(int stage);///< Run the commands to be run in stage1/2
  void combineStage1();///< Combine the outputs of stage1 to estimate parameters
  void combineStage2();///< Do chromocombine
  void combineStage3();///< Do MCMC validation
  void combineStage4();///< Do tree validation
  void writeHpcStage3(string cmdfile);///< Do the mcmc in an iterative manner
  
  void addEMline(string line, vector<double> *Nevec_ptr, vector<double> *muvec_ptr); ///< add the EM line to Ne and mu
  void addEMobs(string filename, vector<double> *Nevec_ptr, vector<double> *muvec_ptr);///< Extract all the final EM lines from Ne and mu
  void writeStringVectorToFile(std::vector<std::string> sv,std::string fn,std::vector<std::string> logfiles, bool addfs=true);///< write a string vector to file (optionally adding "fs " to the start)
  void writeStringToFile(std::string s,std::string fn);///< write a string to file

  ////////////////////////////////////
  void do_omp_set_num_threads(int numthreads);///< Set the number of threads, if omp is available.
  int get_omp_get_thread_num(); ///< Get the number of this thread, if running parallel  (0 else)
  int get_omp_get_num_threads(); ///< Get the number of threads available, if running parallel (0 else)

  //////////////////////////////////// 
  void continuefsmcmc();///< rerun the mcmc, continuing where we left off
  void ignoreGRfsmcmc();///< restore a previous MCMC and set the GR threshold to ignore any problems with it
  void ensurefsroot();///< Make sure wr have a finestructure root
  void ensurefsmcmc();///< Make sure we have a finestructure file name
  void ensurefstree();///< Make sure we have a finestructure tree file name
  void makefsmcmc();///< Make the fs mcmc command lines
  void makefstree();///< Make the fs tree command lines
  void dofsmcmc();///< Do the fs mcmc
  void dofstree();///< Do the fs mcmc

  ////////////////////////////////////
  void switchStdout(const char *newStream);///< switch stdout to newstream
  void revertStdout();///< Revert Stdout to console
  int getHpc();///< Whether or not we are in hpc mode
  int getIndsPerProc();///< Figure out how many inds to use per process
  string getCommandFile(int stage=-1);///< Command file name for a specified (-1: or current) stage
  int getCommandFileCount(int stage=-1);///< Command file count for a specified (-1: or current) stage
  void optionsHelp();///< Default help message about automatic mode
  void outputHelp();///< Help message about the created files
  void stagesHelp();///< Help message about the various computational stages
  void getHelp(std::vector<std::string> args);///<Complex help on particular topics
  void makeExample();///< Create walkthrough

  bool readmcmctraces(int filenum);///< Read the MCMC traces for a particular run number, stored into a temporary (unsaved) object
  bool writemcmctraces(std::string filename);///< Write the MCMC traces to file
  bool mcmcConvergence(); ///< Obtain MCMC convergence diagnostics from the mcmc output, returning 1 if it is OK to proceed, 0 if we are concerned
  double mcmcGRstatistic(std::vector<std::vector<double> > *data); // Compute the Gelman Rubin potential scale reduction factor statistic for a set of runs, for a parameter
  int recommendN();///< Recommend the number of processes per node
  int recommendM();///< Recommend the number of commands per node
 protected:
  ////////////////////////////////////////////
  // Metadata 
  unsigned int nstages;///< The number of stages we store
  bool restorestage;///<Whether we are restoring the stage
  bool allowdep;///< Whether we allow dependencies to be autoresolved
  bool verbose; ///< Whether we are in verbose mode
  std::string filename;
  std::string dirname;
  std::string fileroot;
  std::vector<std::string> sectionnames; ///< Names of sections
  std::vector<std::string> sectioncomments; ///< Names of sections
  std::vector<FsPar> pars; ///< Names of parameters
  std::vector<int> parsize;///< Maximum index of parameters in each section
  std::vector<FsCmd> cmds; ///< Names of commands

  std::vector<std::string> freetextcomments; ///< Comments between tags
  std::string historytext; ///< history

  ////////////////////////////////////////////
  // data in computer processed form
  // processing files (not stored in the settings file)
  std::string s1commandfile;///< Where we store stage 1 commands
  std::string s2commandfile;///< Where we store stage 2 commands
  std::string s3commandfile;///< Where we store stage 3 commands
  std::string s4commandfile;///< Where we store stage 4 commands

  std::vector<std::string> s1commands; ///< commands we construct for stage 1
  std::vector<std::string> s2commands; ///< commands we construct for stage 2
  std::vector<std::string> s3commands; ///< commands we construct for stage 3
  std::vector<std::string> s4commands; ///< commands we construct for stage 4

  std::vector<std::string> s1logfiles; ///< the logfile locations to be written to file
  std::vector<std::string> s2logfiles; ///< the logfile locations to be written to file
  std::vector<std::string> s3logfiles; ///< the logfile locations to be written to file
  std::vector<std::string> s4logfiles; ///< the logfile locations to be written to file

  // for storing and restoring stdout
  int stdout_fd; 
  fpos_t stdout_pos;


  ////////////////////////////////////////////
  // universal properties
  int stage;///< the stage that we are currently processing.
  string fsfile; ///< file root used for storing directories and the fs details
  int hpc; /// HPC mode
  int ploidy; /// haploid or diploid mode
  int numthreads; /// Number of threads

  int indsperproc; ///< how we split up the individuals; default is 1, meaning everyone is processed in a different command  
  string linkagemode; ///< Whether we use linked or unlinked mode (or default: autodetect)
  bool outputlogfiles;///< Whether the redirection to log files are included in the command lists
  std::string exec; ///< the full path of this program, including the right ending (for specifying versions)
  std::vector<int> validatedoutput; ///< Whether we have validated the output of each stage

  // STAGE0 OUTPUT / STAGE 1 PROCESSING AND INPUT
  
  ////////////////////////////////////////////
  // Stage12 UNIVERSAL QUANTITIES
  std::string s12inputtype; ///< type of input files
  std::vector<std::string> phasefiles; ///< input phase files
  std::vector<std::string> recombfiles; ///< input recombination files
  std::string idfile; ///< input donor file
  std::string s12donorfile; ///< input donor file
  std::string s12args; ///< input arguments
  int ninds;///< Number of individuals for which we have data
  int nindsUsed;///< Number of individuals to be used from the data
  int nsnps; ///< Number of SNPs found in total
  vector<int> nsnpsvec;///< Number of SNPs in each file

  ////////////////////////////////////////////
  // STAGE1 OUTPUT / STAGE 1a INPUT
  std::string s1args; ///< stage 1 arguments
  int s1emits;///< number of em iterations 
  std::string s1outputroot;///< Base output root
  std::vector<std::string> s1outputrootvec; ///< output files
  int s1minsnps;///< Minimum number of loci extracted for an EM block
  double s1snpfrac;///< `Fraction' of SNPs to process
  double s1indfrac;///< `Fraction' of INDS to process
  // STAGE1a OUTPUT
  
  ////////////////////////////////////////////
  // STAGE2 OUTPUT / STAGE 2a INPUT
  double Neinf; // Ne as we infer it from stage1
  double muinf; // mu as we infer it from stage1
  int s2chunksperregion;///< Number of chunks to be used to define a region
  int s2samples;///< Number of samples of the painting to obtain per recipient haplotype (default to zero, i.e. don't obtain these)
  std::string s2args; ///< input arguments
  std::string s2outputroot;///< Base output root
  std::vector<std::string> s2outputrootvec; ///< output files
  std::string s2combineargs;///< stage 2combine arguments

  ////////////////////////////////////////////
  // STAGE 3 AND 4 PARAMS

  // STAGE2a OUTPUT
  double cval; ///< The inferred value of "c"
  std::string cproot; ///<combined output root
  std::string cpchunkcounts; ///<chromopainter final chunkcount file

  // STAGE3-4 generic properties
  string s34args;
  std::string fsroot; ///<finestructure output root

  // STAGE 3 PARAMS and OUTPUT (finestructure)
  static const int numitersDefault=100000;
  int s3iters; ///< User interface: how many iterations to do. By default, half are assigned to burnin, half to mcmc
  int s3iterssample; // -x negative for autocalculate from s3iters
  int s3itersburnin; // -y same as above
  int numskip; ///< -z same as above
  int maxretained; ///< for calculating -z
  int nummcmcruns; ///< number of mcmc runs performed

  std::string fsmcmcoutput; ///<fs name
  std::vector<std::string> fsmcmcoutputvec; ///<fs mcmc files
  std::vector<double> mcmcGR;///< The Gelman Rubin statistics computed from mcmc files
  double threshGR; ///<Threshold for rejecting convergence

  // STAGE 4 PARAMS and OUTPUT (finestructure tree)

  string s4args;
  int s4iters; // -x negative for autocalculate from s3iters
  std::string fstreeoutput; ///<fs name
  std::vector<std::string> fstreeoutputvec; ///<fs mcmc files

  // Temporary objects for calculations
  std::vector<std::vector<double> > mcmc_posterior;
  std::vector<std::vector<double> > mcmc_k;
  std::vector<std::vector<double> > mcmc_beta;
  std::vector<std::vector<double> > mcmc_delta;
  std::vector<std::vector<double> > mcmc_f;

  // Keeping track of old MCMC runs
  std::vector<std::string> old_fsmcmcoutputvec; ///<fs mcmc files
  int fscounter;
  int fsmaxattempts;
};

int fsproject(int argc, char *argv[]); ///< fsproject

#endif
