/*
(c) GAFit toolkit $Id: common.c 558 2025-11-30 01:00:26Z ro $
*/
#if HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
#include <math.h>
#include <fcntl.h>


#include "../inputline/line.h"
#include "../flyctl/flyctl.h"
#include "../environ/environ.h"
#include "../nullist/nllist.h"
#include "../rstrings/rstrings.h"
#include "../rangecf/rangecf.h"
#include "common.h"

int debug = 0;

char *ExternalInput = NULL;
char *ExternalFit = NULL;
char *Bounds = NULL;
char *InputTemplate = NULL;
char *ExternalExecutable = NULL;
int ParallelCalcs = 1;
char *ReferenceValues = NULL;
char *RefValuesFile = NULL;
char *RawFitsFile = NULL;
char *RawResultsFile = NULL;
extern char **white;

void
InitEnvironmentVariables (void)
{
  int nc;
  ExternalInput = SetEnv (STR_EXTERNAL_INPUT, EXTERNAL_INPUT);
  ExternalFit = SetEnv (STR_EXTERNAL_FIT, EXTERNAL_FIT);
  Bounds = SetEnv (STR_BOUNDS_FILE, BOUNDS_FILE);
  InputTemplate = SetEnv (STR_INPUT_TEMPLATE, INPUT_TEMPLATE);
  ExternalExecutable = SetEnv (STR_EXTERNAL_EXECUTABLE, EXTERNAL_EXECUTABLE);
  nc = atoi (SetEnv (STR_PARALLEL_CALCS, PARALLEL_CALCS));
  if (nc <= 0)
    ParallelCalcs = 1;
  else
    ParallelCalcs = nc;
  ReferenceValues = SetEnv (STR_REF_VALUES, REFERENCE_VALUES);
  RawFitsFile = SetEnv (STR_RAW_FITS, RAW_FITS);
  RawResultsFile = SetEnv (STR_RAW_RESULTS, RAW_RESULTS);
  RefValuesFile = SetEnv (STR_REF_VAL_FILE, ReferenceValues);
}

int
WriteInputFile (char *content, char *file, char *templateName)
{
  FILE *w;
  char name[MAXSTR];
  strcpy (name, file);
  strcat (name, ".");
  strcat (name, templateName);
  w = fopen (name, "w");
  if (w)
    {
      fprintf (w, "%s", content);
      fclose (w);
      return 1;
    }
  fcPrintfStopIt ("cannot write file %s\n", name);
  return 0;
}

char **
SplitTemplates (char *list)
{
  char *white[] = { " ", "\t", NULL };
  char *nlist = sRtrim (sLtrim (strdup (list)));
  char **a = nllListParser (nlist, white);
  a = nllDelete (a, white);
  free (nlist);
  return a;
}

char *
template2Txt (char *name)
{
  FILE *f = fopen (name, "r");
  if (!f)
    fcPrintfStopIt ("template %s not found.", name);
  char *text = InputFileText (f);
  fclose (f);
  return text;
}

void
txt2Template (char *name, char *txt)
{
  FILE *f = fopen (name, "w");
  if (!f)
    fcPrintfStopIt ("cannot write %s\n", name);
  fwrite (txt, strlen (txt), 1, f);
  fclose (f);
}

char *
MergeTemplates (char *list)
{
  char **a = SplitTemplates (list);
  char *allText;
  int n = nllCount (a);
  allText = template2Txt (nllString (a, 0));
  for (int i = 1; i < n; i++)
    {
      char *t = template2Txt (nllString (a, i));
      allText = realloc (allText, strlen (allText) + strlen (t) + 1);
      strcat (allText, t);
      free (t);
    }
  txt2Template (MERGED_TEMPLATES, allText);
  free (allText);
  nllClear (a);
  return MERGED_TEMPLATES;
}


void
rfClear (REFERENCEVALUES *rf)
{
  free (rf->name);
  free (rf);
}

void
refsClear (REFERENCEVALUES **rf, int n)
{
  REFERENCEVALUES **p = rf;
  for (int i = 0; i < n; i++)
    {
      rfClear (*p);
      p++;
    }
  free (rf);
}

REFERENCEVALUES *
getRef (FILE *f)
{
	int n;
  char line[MAXSTR + 1];
  REFERENCEVALUES *rf = malloc (sizeof (REFERENCEVALUES));
  rf->name = (char *) malloc (21);
  while (fgets (line, MAXSTR, f) != NULL)
    {
      char *p = line;
      while (*p == ' ' || *p == '\t')
	p++;
      if (*p == '\r' || *p == '\n')
	continue;
//      printf ("rf line:%s\n%s\n", line, p);
      n=sscanf (line, "%lf %lf %20s", &(rf->value), &(rf->weight), rf->name);
      if (n<2)
	      printf("Error: reference values short read\n");
      if (rf->weight == 0)
	rf->weight = 1;
//      printf ("scan: %lf %lf {%-20s}\n", rf->value, rf->weight, rf->name);
      return rf;
    }

  rfClear (rf);
  return NULL;
}

int
getDouble (FILE *f, double *number)
{
  char line[MAXSTR + 1];
  //int res = 0;
  while (fgets (line, MAXSTR, f) != NULL)
    {
      char *p = line;
      while (*p == ' ' || *p == '\t')
	p++;
      if (*p == '\r' || *p == '\n')
	continue;
      sscanf (line, "%lf", number);
      //printf("%s ->%lf\n",line,*number);
      return 1;
    }
  return 0;
}

double *
dlist (char *fname, int *n)
{
  double *d = NULL;
  double number;
  //char line[MAXSTR];

  *n = 0;
  FILE *r = fopen (fname, "r");
  if (r != NULL)
    {
      while (getDouble (r, &number))
	{
	  (*n)++;
	  d = realloc (d, (*n) * sizeof (double));
	  d[(*n) - 1] = number;
	}
      fclose (r);
    }
  else
    {
      fcPrintfStopIt ("Cannot open %s\n", fname);
    }
  return d;
}

REFERENCEVALUES **
rvlist (char *fname, int *n)
{
  REFERENCEVALUES **rlist = NULL;
  REFERENCEVALUES *rf;
  int nn = 0;

  FILE *r = fopen (fname, "r");
  if (r != NULL)
    {

      while ((rf = getRef (r)) != NULL)
	{
	  nn++;
	  rlist = realloc (rlist, nn * sizeof (REFERENCEVALUES *));
	  rlist[nn - 1] = rf;
	}
    }
  *n = nn;
  return rlist;
}

REFERENCEVALUES **
getNRefs (char *fname, int *n)
{
  REFERENCEVALUES **rf = rvlist (fname, n);
  return rf;
}

void
WriteFit (char *file, int pos, double value)
{
  int fn;
  int status;
  fn = open (file, O_WRONLY);
  if (fn < 0)
    fcPrintfStopIt ("cannot open rawfit:%s\n", file);
  lseek (fn, (off_t) (sizeof (double) * pos), SEEK_SET);
  status = lockf (fn, F_LOCK, (off_t) sizeof (double));
  if (status != 0)
    fcPrintfStopIt ("cannot lock rawfit:%s\n");
  write (fn, &value, sizeof (double));
  close (fn);
}

void
WriteResults (char *file, int pos, int n, double *value)
{
  int fn;
  int status;
  fn = open (file, O_WRONLY);
  if (fn < 0)
    fcPrintfStopIt ("cannot open rawresults:%s\n", file);
  lseek (fn, (off_t) (sizeof (double) * pos * n), SEEK_SET);
  status = lockf (fn, F_LOCK, (off_t) sizeof (double) * n);
  if (status != 0)
    fcPrintfStopIt ("cannot lock rawresults:%s\n", file);
  write (fn, value, sizeof (double) * n);
  close (fn);
}

/**
 read binary file with results in one vector
  n x m
**/
double *
ReadResults (char *file, int n, int m)
{
  int fn;
  int len = sizeof (double) * m * n;
  double *value = malloc (len);
  fn = open (file, O_RDONLY);
  if (fn < 0)
    fcPrintfStopIt ("cannot open rawresults:%s\n", file);
  int wr = read (fn, value, len);
  if (wr != len)
    printf ("short read\n");
  close (fn);
  return value;
}

//comparison function to qsort
//order: fit
int
resCompare (const void *a, const void *b)
{
  if ((*((RESULTVALUES **) a))->fit < (*((RESULTVALUES **) b))->fit)
    return -1;
  else if ((*((RESULTVALUES **) a))->fit > (*((RESULTVALUES **) b))->fit)
    return 1;
  return 0;
}

int
searchMinFit (ALLRESULTS *all)
{
  int min = 0;
  for (int i = 1; i < all->nresults; i++)
    {
      if (all->results[min]->fit > all->results[i]->fit)
	min = i;
    }
  return min;
}

int
searchMaxFit (ALLRESULTS *all)
{
  int max = 0;
  for (int i = 1; i < all->nresults; i++)
    {
      if (all->results[max]->fit < all->results[i]->fit)
	max = i;
    }
  return max;
}

double
searchMaxDist (ALLRESULTS *all)
{
  double max = all->results[0]->distance;
  for (int i = 1; i < all->nresults; i++)
    {
      if (max < all->results[i]->distance)
	max = all->results[i]->distance;
    }
  return max;
}


ALLRESULTS *
calcDist (ALLRESULTS *all)
{
  int min = searchMinFit (all);
  for (int i = 0; i < all->nresults; i++)
    {
      double dist = 0;
      for (int j = 0; j < all->ncoefs; j++)
	{
	  dist +=
	    pow (all->results[i]->coefs[j] - all->results[min]->coefs[j],
		 2.0);
	}
      all->results[i]->distance = sqrt (dist);
    }
  return all;
}

ALLRESULTS *
sortAll (ALLRESULTS *all)
{
  qsort (all->results, all->nresults, sizeof (RESULTVALUES *), resCompare);

  return all;
}

//comparison function to qsort
//order: distance
int
resCompareD (const void *a, const void *b)
{
  if ((*((RESULTVALUES **) a))->distance < (*((RESULTVALUES **) b))->distance)
    return -1;
  else if ((*((RESULTVALUES **) a))->distance >
	   (*((RESULTVALUES **) b))->distance)
    return 1;
  return 0;
}


ALLRESULTS *
sortAllD (ALLRESULTS *all)
{
  qsort (all->results, all->nresults, sizeof (RESULTVALUES *), resCompareD);	//order by distance
  return all;
}

ALLRESULTS *
deleteOneResult (ALLRESULTS *all, int n)
{
  RESULTVALUES **rs = all->results;

  double *coefs = rs[n]->coefs;
  double *values = rs[n]->values;
  //printf ("deleteOnRe %d fit:%lf distance:%lf \n", n, all->results[n]->fit,
//        all->results[n]->distance);
  for (int i = n; i < all->nresults - 1; i++)
    rs[i] = rs[i + 1];
  all->nresults--;
  all->results =
    realloc (all->results, sizeof (RESULTVALUES *) * all->nresults);
  free (coefs);
  free (values);
  //for (int i = 0; i < all->nresults; i++)
  //  printf ("\t%d %lf %lf\n", i, all->results[i]->fit,
  //        all->results[i]->distance);
  return all;
}

ALLRESULTS *
applyPercentile (ALLRESULTS *all)
{
  double delta;
  double min, max;
  double mxall;
  int mark, i;

  all = calcDist (all);		//calculate distances
  mxall = searchMaxDist (all);
  delta = mxall / all->limit;	//limit->number of centiles.
  all = sortAll (all);		//order by fit

  //printf ("max distance:%lf limit: %ld delta: %lf\n", mxall, all->limit,
  //       delta);

  for (int j = 0; j < all->limit; j++)
    {
      min = delta * j;
      max = delta * (j + 1);
      //printf ("percentil (%lf,%lf)\n", min, max);
      mark = 0;
      i = 0;
      while (i < all->nresults)
	{
	  if (min <= all->results[i]->distance && all->results[i]->distance < max)	// max is preserved (not "<=")
	    {			//delete all except the first in each percentile
	      if (mark == 0)
		{
		  mark = 1;
		  //printf ("\tsave %d %lf %lf\n", i, all->results[i]->fit,
		  //        all->results[i]->distance);
		  i++;
		}
	      else
		{
		  //        printf ("\tdelete %d %lf %lf ", i, all->results[i]->fit,
		  //                all->results[i]->distance);
		  all = deleteOneResult (all, i);
		}
	    }
	  else
	    {
	      i++;
	    }
	}
    }


  all = sortAllD (all);

  return all;
}

//duplicates a memory block
//on give up, must be freed'

void *
memdup (void *or, size_t size)
{
  void *dest = malloc (size);
  if (dest == NULL)
    return NULL;
  return memcpy (dest, or, size);
}

//all data together.
//
ALLRESULTS *
loadNewResults (int nindv, int nvalues,
		RANGECOEFS *rc, double **coefficients,
		double *results, double *fits, REFERENCEVALUES **rv)
{

  ALLRESULTS *all;
  all = (ALLRESULTS *) malloc (sizeof (ALLRESULTS));
  all->results = (RESULTVALUES **) malloc (sizeof (RESULTVALUES *) * nindv);
  //fill up here coefficients values, results values and distance from best
  all->ncoefs = rc->ncoefs;
  all->nvalues = nvalues;
  all->nindiv = nindv;
  all->nresults = nindv;
  all->limit = nindv;
  all->coefsnames = rc->names;
  all->references = rv;
  for (int i = 0; i < all->nindiv; i++)
    {
      all->results[i] = (RESULTVALUES *) malloc (sizeof (RESULTVALUES));
      all->results[i]->fit = fits[i];
      if (all->results[i]->fit < 0)
	{
	  fcPrintfStopIt ("error:%ld\n", all->results[i]->fit);
	}
      all->results[i]->distance = 0;
      all->results[i]->coefs = coefficients[i];
      //split and copy one dimension vector into slices
      all->results[i]->values =
	memdup (results + (i * nvalues), nvalues * sizeof (double));
    }

  return all;
}

void
pline (FILE *f, int n)
{
  for (int i = 0; i < n; i++)
    fprintf (f, PLINE);
  fprintf (f, PLIND);
}

void
phead (FILE *f, char *h)
{
  fprintf (f, PHEAD, h);
}

void
pnhead (FILE *f, char *h, int n)
{
  double l = (n - strlen (h)) / 2.0;
  int a = floor (l);
  char buffer[MAXSTR];
  snprintf (buffer, MAXSTR, PNHEAD, 0, "", 0, "", 0, "");
  int c = strlen (buffer);
  int b = n - a - strlen (h) - c;
  fprintf (f, PNHEAD, a, PSPACE, (int) strlen (h), h, b, PSPACE);
}

void
pempty (FILE *f, int n)
{
  fprintf (f, SIMPLE_BAR);
  for (int i = 1; i < n; i++)
    fprintf (f, PEMPTY);
}

void
pbars (FILE *f, int n)
{
  fprintf (f, SIMPLE_BAR);
  for (int i = 1; i < n; i++)
    phead (f, "");
}


void
pend (FILE *f)
{
  fprintf (f, PEND);
}

void
pint (FILE *f, int n)
{
  fprintf (f, PINT, n);
}

void
pdouble (FILE *f, double d)
{
  fprintf (f, PDOUBLE, d);
}


void
printAllResults (FILE *f, ALLRESULTS *all, int nodups, int yeslimit)
{
  char buffer[MAXSTR];
  if (all != NULL)
    {
      pline (f, 3 + all->ncoefs + all->nvalues);
      pempty (f, 3);
      snprintf (buffer, MAXSTR, PHEADER, "coefficients", all->ncoefs);
      pnhead (f, buffer, strlen (PLINE) * all->ncoefs);
      snprintf (buffer, MAXSTR, PHEADER, "reference values", all->nvalues);
      pnhead (f, buffer, strlen (PLINE) * all->nvalues);
      pend (f);

      pline (f, 3 + all->ncoefs + all->nvalues);
      pbars (f, 8);

      for (int i = 0; i < all->nvalues; i++)
	phead (f, all->references[i]->name);

      pend (f);

      pbars (f, 8);
      for (int i = 0; i < all->nvalues; i++)
	pdouble (f, all->references[i]->value);
      pend (f);

      phead (f, "#");
      phead (f, "fit");
      phead (f, "distance");
      for (int i = 0; i < all->ncoefs; i++)
	phead (f, all->coefsnames[i]);
      for (int i = 0; i < all->nvalues; i++)
	pdouble (f, all->references[i]->weight);
      pend (f);

      pline (f, 3 + all->ncoefs + all->nvalues);
      double afit, adistance;
      int sequence = 0;
      adistance = afit = -1.0;
      for (int i = 0; i < all->nresults; i++)
	{
	  //printf ("check: %lg %lg <-%d-> %lg %lg difs [ %lg %lg ]\n", afit,
	  //        adistance, i, all->results[i].fit, all->results[i].distance,
	  //        afit - all->results[i].fit,
	  //        adistance - all->results[i].distance);
	  if (nodups && i != 0
	      && afit ==
	      all->results[i]->fit && all->results[i]->distance == adistance)
	    {			//skip duplicates
	      //printf ("skip %d fit:%lg d:%lg\n ", i, afit, adistance);
	      continue;
	    }

	  sequence++;
	  if (yeslimit && sequence >= all->limit)
	    break;		//print no more than limit
	  pint (f, sequence);
	  pdouble (f, all->results[i]->fit);
	  pdouble (f, all->results[i]->distance);
	  for (int j = 0; j < all->ncoefs; j++)
	    pdouble (f, all->results[i]->coefs[j]);
	  for (int j = 0; j < all->nvalues; j++)
	    pdouble (f, all->results[i]->values[j]);
	  pend (f);

	  adistance = all->results[i]->distance;
	  afit = all->results[i]->fit;
	}
      pline (f, 3 + all->ncoefs + all->nvalues);
    }
  else
    {
      printf ("empty set\n");
    }
}

void
WriteTextResults (char *file, ALLRESULTS *all)
{
  FILE *f = fopen (file, "w");
  if (f == NULL)
    fcPrintfStopIt ("cannot open %s\n", file);
  printAllResults (f, all, NODUPS, YESLIMIT);	//limited by percentil number.
  fclose (f);
}


char *
getLineInt (char *s, int *i)
{
  sscanf (s, PINT, i);
  return index (++s, '|');
}

char *
getLineDouble (char *s, double *d)
{
  sscanf (s, PDOUBLE, d);
  return index (++s, '|');
}

//read from text output, no needed, inacuracy due decimal precission.
/*
ALLRESULTS *scanOldTextResults (FILE * f, ALLRESULTS * all)
{
  char *l;
  char *first;
  int counter;
  int isFirst;

  isFirst = 1;

  while (l = InputFileLine (f))
    {
      if (isFirst == 1)
	{
	  first = l;
	  isFirst = 0;
	  counter = 0;
	}
      else
	{
	  if (strcmp (l, first) != 0)
	    {
	      if (counter > 1)
		{
		  int n;
		  char *bar = l;
		  double d;
		  all->nresults++;
		  all->results =
		    (RESULTVALUES *) realloc (all->results,
					      sizeof (RESULTVALUES) *
					      all->nresults);
		  bar = getLineInt (bar, &n);
		  printf ("n->%d ", n);
		  bar = getLineDouble (bar, &d);
		  printf ("fit->%lg ", d);
		  all->results[all->nresults - 1].fit = d;

		  bar = getLineDouble (bar, &d);
		  printf ("distance->%lg ", d);
		  all->results[all->nresults - 1].distance = 0;

		  double *coefs = malloc (sizeof (double) * all->ncoefs);
		  for (int i = 0; i < all->ncoefs; i++)
		    {
		      bar = getLineDouble (bar, &d);
		      printf ("%s=%lg ", all->coefsnames[i], d);
		      coefs[i] = d;
		    }
		  all->results[all->nresults - 1].coefs = coefs;

		  double *values = malloc (sizeof (double) * all->nvalues);
		  for (int i = 0; i < all->nvalues; i++)
		    {
		      bar = getLineDouble (bar, &d);
		      printf ("%s=%lg ", all->references[i]->name, d);
		      values[i] = d;
		    }
		  all->results[all->nresults - 1].values = values;
		  printf ("\n");
		}
	    }
	  else
	    {
	      counter++;
	    }
	  free (l);
	  if (counter > 2)
	    break;
	}
    }

  return all;
}
*/

int
getrDouble (FILE *f, double *what)
{
  return fread (what, sizeof (double), 1, f);
}

int
getrInt (FILE *f, int *what)
{
  return fread (what, sizeof (int), 1, f);
}

int
savrDouble (FILE *f, double what)
{
  return fwrite (&what, sizeof (double), 1, f);
}

int
savrInt (FILE *f, int what)
{
  return fwrite (&what, sizeof (int), 1, f);
}

ALLRESULTS *
loadIntermediateResults (FILE *f, ALLRESULTS *all)
{
  double d;
  int e;
  int sequence;
  //int howmany;

  sequence = 0;
  //howmany = all->nresults;

  while (!feof (f))
    {
      sequence++;
      getrInt (f, &e);
      if (feof (f))
	break;
      if (e != sequence)
	{
	  printf ("out sequence: %d != %d\n", e, sequence);
	  break;
	}
      all->nresults++;
      all->results =
	(RESULTVALUES **)
	realloc (all->results, sizeof (RESULTVALUES *) * all->nresults);
      all->results[all->nresults - 1] =
	(RESULTVALUES *) malloc (sizeof (RESULTVALUES));
      getrDouble (f, &d);
      if (d < 0)
	{
	  //printf ("fit->%lg ", d);
	  fcStopIt ("negative fit");
	}
      all->results[all->nresults - 1]->fit = d;

      //printf ("s: %d added %lg ", sequence, d);

      getrDouble (f, &d);
      //printf ("distance->%lg ", d);
      all->results[all->nresults - 1]->distance = d;	//must be calculated again
      double *coefs = malloc (sizeof (double) * all->ncoefs);
      for (int i = 0; i < all->ncoefs; i++)
	{
	  getrDouble (f, &d);
	  //  printf ("%s=%lg ", all->coefsnames[i], d);
	  coefs[i] = d;
	}
      all->results[all->nresults - 1]->coefs = coefs;
      double *values = malloc (sizeof (double) * all->nvalues);
      for (int i = 0; i < all->nvalues; i++)
	{
	  getrDouble (f, &d);
	  //printf ("%s=%lg ", all->references[i]->name, d);
	  values[i] = d;
	}
      all->results[all->nresults - 1]->values = values;
      //printf ("\n");
    }
  //printf ("\nadded %d\n", all->nresults - howmany);
  return all;
}

void
saveIntermediateResults (FILE *f, ALLRESULTS *all)
{
  int sequence;
  sequence = 0;
  for (int n = 0; n < all->nresults; n++)
    {
      sequence++;
      savrInt (f, sequence);
      savrDouble (f, all->results[n]->fit);
      savrDouble (f, all->results[n]->distance);
      for (int i = 0; i < all->ncoefs; i++)
	{
	  savrDouble (f, all->results[n]->coefs[i]);
	}

      for (int i = 0; i < all->nvalues; i++)
	{
	  savrDouble (f, all->results[n]->values[i]);
	}
      //printf ("s: %d f:%lf d:%lf ", sequence, all->results[n]->fit,
      //      all->results[n]->distance);
    }
}

void
WriteIntermediateResults (char *file, ALLRESULTS *all)
{
  FILE *f = fopen (file, "w");
  if (f == NULL)
    fcPrintfStopIt ("cannot open %s\n", file);
  saveIntermediateResults (f, all);
  fclose (f);
}


ALLRESULTS *
ReadIntermediateResults (char *file, ALLRESULTS *all)
{
  FILE *f = fopen (file, "r");
  if (f == NULL)
    fcPrintfStopIt ("cannot open -%s-\n", file);
  all = loadIntermediateResults (f, all);
  fclose (f);
  return all;
}

char *
newFile (char *file)
{
  int fn = open (file, O_CREAT | O_TRUNC);
  if (fn < 0)
    fcPrintfStopIt ("cannot open:%s\n", file);
  close (fn);
  chmod (file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
  return file;
}
