/*
Copyright 2018 Andrew R. Booker, Min Lee and Andreas Strömbergsson

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <acb.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

//#define GALOIS_ONLY
//#define GAMMA_ONLY
#define prec 100
#define points 200
#define ntest (2*points-1)
#if defined(GAMMA_ONLY)
#define Nmax 51530
#elif defined(GALOIS_ONLY)
#define Nmax 3001
#else
#define Nmax 1001
#endif
#define nPmax 8

static int gsupp;
static arb_t delta,deltainv;
static arb_t zero,one,negone,zeroone,onetwo,ln2,logpi,euler;

struct chi_struct {
	int cond,num_odd,a[nPmax],s[nPmax];
};
typedef struct {
	int N,nP,nchar,P[nPmax],e[nPmax],pe[nPmax],phipe[nPmax];
	int sqrtmod4pe[nPmax][4*Nmax],discretelog[nPmax][Nmax];
	struct chi_struct allchi[Nmax],*chi;
	arb_t s1[ntest],s2[ntest];
} trace_t;

#include "dihedral.c"
#include "exotic.c"
#include "quad.c"

static int gcd(int x,int y) {
	int t;
	while (x)
		t = y % x, y = x, x = t;
	return y;
}

static inline int modmul(int x,int y,int p) {
	return (int)((long)x*y % p);
}

static int modpow(int x,int n,int p) {
	int a = 1;
	while (n > 0) {
		if (n & 1) a = modmul(a,x,p);
		x = modmul(x,x,p);
		n >>= 1;
	}
	return a;
}

static int chi8[] = {0,1,0,-1,0,-1,0,1};
static int kronecker(long int d,int p) {
	int dmodp;
	if (p == 2) return chi8[d & 7];
	if ( !(dmodp = d % p) ) return 0;
	if (dmodp < 0) dmodp += p;
	if (modpow(dmodp,(p-1)>>1,p) == 1)
		return 1;
	return -1;
}

// find an element of order ord in Z/nZ, or 0 if none exists
// if p is an odd prime < 40487 then the smallest primitive root
//   mod p is also a primitive root mod p^2
static int generator(int n,int ord) {
	int g,h,k;

	for (g=1;g<n;g++)
		if (gcd(g,n) == 1) {
			for (k=1,h=g;h!=1;k++)
				h = modmul(h,g,n);
			if (k == ord)
				return g;
		}
	return 0;
}

static int sqrtmodp(int a,int p) {
	int r,n,m,k,kp,x,b,c,z;
	a %= p;
	if (!a) return 0;
	if (a < 0) a += p;
	if (modpow(a,p>>1,p) != 1) return -1;
	for (m=p-1,k=0;(m&1)==0;m>>=1,k++);
	r = modpow(a,(m+1)>>1,p);
	n = modpow(a,m,p);
	if (n == 1) return r;
	for (z=2;modpow(z,p>>1,p)==1;z++);
	c = modpow(z,m,p);
	do {
		for (kp=0,x=n;x!=1;x=modmul(x,x,p),kp++);
		b = modpow(c,1<<(k-kp-1),p);
		r = modmul(r,b,p), c = modmul(b,b,p), n = modmul(n,c,p);
		k = kp;
	} while (n > 1);
	return r;
}

static long intpow(int p,int e) {
	long res,x;

	if (!e) return 1;
	res = p;
	while (--e) res *= p;
	return res;
}

// order of n at p
static int ordp(long n,int p) {
	int j;
	for (j=0;!(n%p);j++)
		n /= p;
	return j;
}

#define arb_get_d(x) arf_get_d(arb_midref(x),ARF_RND_NEAR)
static void myabs(arb_t y,const arb_t x,long pr) {
	static int init;
	static arf_t l,r;

	if (!init) {
		arf_init(l); arf_init(r);
		init = 1;
	}
	arb_abs(y,x);
	arb_get_interval_arf(l,r,y,pr);
	if (arf_sgn(l) < 0) {
		arf_zero(l);
		arb_set_interval_arf(y,l,r,pr);
	}
}

static void g1(arb_t res,arb_srcptr u) {
	static int init;
	static arb_t u1,u2,t;
	int emptyset;

	if (!init) {
		arb_init(u1); arb_init(u2); arb_init(t);
		init = 1;
	}
	myabs(u1,u,prec);
	arb_sub_ui(u2,u1,2,prec);
	emptyset = 1; // res has not been set yet
	if (arb_contains_nonnegative(u2)) {
		arb_zero(res);
		emptyset = 0;
	}
	if (arb_overlaps(u1,onetwo)) {
		arb_mul(t,u2,u2,prec);
		arb_mul(t,t,u2,prec);
		arb_div_si(t,t,-6,prec); // t = (2-u)^3/6
		if (emptyset)
			arb_set(res,t);
		else
			arb_union(res,res,t,prec);
		emptyset = 0;
	}
	if (arb_overlaps(u1,zeroone)) {
		arb_mul(t,u1,u1,prec);
		arb_mul(t,t,u2,prec);
		arb_mul_ui(t,t,3,prec);
		arb_add_ui(t,t,4,prec);
		arb_div_ui(t,t,6,prec); // t = (u-2)*u^2/2+2/3
		if (emptyset)
			arb_set(res,t);
		else
			arb_union(res,res,t,prec);
	}
}

static void g1int(acb_t res,int u0,acb_srcptr u) {
	static int init;
	static acb_t u1,u2;

	if (!init) {
		acb_init(u1); acb_init(u2);
		init = 1;
	}
	if (u0 < 0) {
		acb_neg(u1,u);
		u0 = -u0-1;
	} else
		acb_set(u1,u);
	
	acb_sub_ui(u2,u1,2,prec);
	if (u0 == 0) {
		acb_mul(res,u1,u1,prec);
		acb_mul(res,res,u2,prec);
		acb_mul_ui(res,res,3,prec);
		acb_add_ui(res,res,4,prec);
		acb_div_ui(res,res,6,prec); // res = (u-2)*u^2/2+2/3
	} else if (u0 == 1) {
		acb_cube(res,u2,prec);
		acb_div_si(res,res,-6,prec); // res = -(u-2)^3/6
	} else
		acb_zero(res);
}

static void g1pint(acb_t res,int u0,acb_srcptr u) {
	static int init;
	static acb_t u1;
	int negative;

	if (!init) {
		acb_init(u1);
		init = 1;
	}

	if (u0 < 0) {
		acb_neg(u1,u);
		u0 = -u0-1;
		negative = 1;
	} else {
		acb_set(u1,u);
		negative = 0;
	}

	if (u0 == 0) {
		acb_mul_ui(res,u1,3,prec);
		acb_sub_ui(res,res,4,prec);
		acb_mul(res,res,u1,prec);
		acb_mul_2exp_si(res,res,-1); // res = (3u-4)*u/2
	} else if (u0 == 1) {
		acb_sub_ui(res,u1,2,prec);
		acb_mul(res,res,res,prec);
		acb_div_si(res,res,-2,prec); // res = -(u-2)^2/2
	} else
		acb_zero(res);
	
	if (negative)
		acb_neg(res,res);
}

static void g(arb_t res,int i,arb_srcptr dinvx) {
	static int init;
	static arb_t t;

	if (!init) {
		arb_init(t);
		init = 1;
	}

	// res = deltainv*(g1(dinvx-i)+g1(dinvx+i))/2
	arb_sub_si(t,dinvx,i,prec);
	g1(res,t);
	arb_add_si(t,dinvx,i,prec);
	g1(t,t);
	arb_add(res,res,t,prec);
	arb_mul(res,res,deltainv,prec);
	arb_mul_2exp_si(res,res,-1);
}

// the analytic function that agrees with delta*g(delta*z)
// for z between n and n+1
static void gint(acb_t res,int i,int n,acb_srcptr z) {
	static int init;
	static acb_t t;

	if (!init) {
		acb_init(t);
		init = 1;
	}

	// res = (g1(z-i)+g1(z+i))/2
	acb_sub_si(t,z,i,prec);
	g1int(t,n-i,t);
	acb_add_si(res,z,i,prec);
	g1int(res,n+i,res);
	acb_add(res,res,t,prec);
	acb_mul_2exp_si(res,res,-1);
}

// the analytic function that agrees with g'(delta*z)/z
// for z between n and n+1
static void gpxint(acb_t res,int i,int n,acb_srcptr z) {
	static int init;
	static acb_t r,t;

	if (!init) {
		acb_init(r); acb_init(t);
		init = 1;
	}

	if (!n) {
		if (i == 0) {
			// (3z-4)/2
			acb_mul_ui(r,z,3,prec);
			acb_sub_ui(r,r,4,prec);
			acb_mul_2exp_si(r,r,-1);
		} else if (i == 1) {
			// 1-z
			acb_sub_ui(r,z,1,prec);
			acb_neg(r,r);
		} else if (i == 2) {
			// z/4
			acb_mul_2exp_si(r,z,-2);
		} else
			acb_zero(r);
	} else {
		// (g1p(z-i)+g1p(z+i))/(2z)
		acb_sub_si(t,z,i,prec);
		g1pint(t,n-i,t);
		acb_add_si(r,z,i,prec);
		g1pint(r,n+i,r);
		acb_add(r,r,t,prec);
		acb_div(r,r,z,prec);
		acb_mul_2exp_si(r,r,-1);
	}

	acb_mul_arb(r,r,deltainv,prec);
	acb_mul_arb(res,r,deltainv,prec);
}

static unsigned short *factor;
static int *sqfm,*sqfp;

static void factor_table(int T) {
  int x,t,p;

	if (T < Nmax) T = Nmax;
	factor = calloc(T+1,sizeof(factor[0]));
  for (x=(int)sqrtl((long double)T);x>=2;x--)
    for (t=x*x;t<=T;t+=x)
      factor[t] = (unsigned short)x;
}

static void sqfm_table(int T) {
	int x,t,p;
	long int d,p2;

	sqfm = malloc((T+1)*sizeof(sqfm[0]));
	for (t=0;t<=T;t++) sqfm[t] = 1;
	for (t=4;t<=T;t+=2) {
		d = (long)t*t-4;
		do sqfm[t] <<= 1, d >>= 2; while (!(d & 3));
		if (d & 2) sqfm[t] >>= 1;
	}
	for (p=3;p<=T+2;p+=2) {
		if (factor[p]) continue;
		p2 = (long)p*p;
		t=p-2,x=4; if (p==3) t=4,x=1;
		for (;t<=T;t+=x,x=p-x) {
			d = (long)t*t-4;
			while (d % p2 == 0) sqfm[t] *= p, d /= p2;
		}
	}
}

static void sqfp_table(int T) {
	int x,t,p;
	long int d,p2;

	sqfp = malloc((T+1)*sizeof(sqfp[0]));
	for (t=0;t<=T;t++) sqfp[t] = 1;
	for (t=4;t<=T;t+=2) {
		d = (long)t*t+4;
		do sqfp[t] <<= 1, d >>= 2; while (!(d & 3));
		if (d & 2) sqfp[t] >>= 1;
	}
	for (p=5;p<=T+2;p+=4) {
		if (factor[p]) continue;
		p2 = (long)p*p;
		t = sqrtmodp(-4,p);
		if ( (x = p-2*t) < 0)
			t = p-t, x = -x;
		for (;t<=T;t+=x,x=p-x) {
			d = (long)t*t+4;
			while (d % p2 == 0) sqfp[t] *= p, d /= p2;
		}
	}
}

static void char_recurse(trace_t *tr,int a[],int s[],int l) {
	int i,amax;
	if (l == tr->nP) {
		tr->allchi[tr->nchar].cond = 1;
		tr->allchi[tr->nchar].num_odd = 0;
		for (i=0;i<tr->nP;i++) {
			tr->allchi[tr->nchar].a[i] = a[i];
			tr->allchi[tr->nchar].s[i] = s[i];
			tr->allchi[tr->nchar].cond *= intpow(tr->P[i],s[i]);
			tr->allchi[tr->nchar].num_odd += (a[i] & 1);
		}
		/* only keep even characters */
		if (!(tr->allchi[tr->nchar].num_odd & 1)) tr->nchar++;
	} else {
		amax = tr->phipe[l]/2;
		if (tr->P[l] == 2 && tr->e[l] > 2) amax++;
		for (a[l]=0;a[l]<=amax;a[l]++) {
			if (tr->P[l] == 2) {
				if (!a[l] && ((tr->e[l] & 1) || tr->e[l] == 2))
					s[l] = 0;
				else if (a[l] == 1 &&
				((tr->e[l] & 1) && tr->e[l] >= 5 || tr->e[l] == 2 || tr->e[l] == 4))
					s[l] = 2;
				else if (a[l]>>1 == 1<<(tr->e[l]-tr->e[l]/2))
					s[l] = tr->e[l]/2;
				else if (a[l] & 2)
					s[l] = tr->e[l];
				else
					continue;
			} else {
				if (!a[l])
					s[l] = 0;
				else if (a[l] == tr->phipe[l]>>ordp(tr->phipe[l],2))
					s[l] = 1;
				else if (a[l] % tr->P[l])
					s[l] = tr->e[l];
				else
					continue;
			}
			char_recurse(tr,a,s,l+1);
		}
	}
}

static int chicmp_nP;
static int chicmp(const void *X,const void *Y) {
	int i;
	const struct chi_struct
		*x=(struct chi_struct *)X,*y=(struct chi_struct *)Y;
	if (x->cond != y->cond)
		return x->cond-y->cond;
	for (i=0;i<chicmp_nP;i++)
		if (x->a[i] != y->a[i])
			return x->a[i]-y->a[i];
	return 0;
}

/* compute even Dirichlet characters mod N and related structures */
static trace_t *trace_init(int N) {
	trace_t *tr;
	int i,m,g,n,p,a[nPmax],s[nPmax];

	tr = malloc(sizeof(*tr));
	m = tr->N = N; tr->nP = 0;
	while (m > 1) {
		if ( !(p = factor[m]) ) p = m;
		tr->e[tr->nP] = 0;
		do m /= p, tr->e[tr->nP]++;  while (m % p == 0);
		tr->P[tr->nP++] = p;
	}

	for (i=0;i<tr->nP;i++) {
		tr->pe[i] = intpow(tr->P[i],tr->e[i]);
		tr->phipe[i] = tr->pe[i]-tr->pe[i]/tr->P[i];
		g = (tr->P[i] == 2 && tr->e[i] > 2) ? 5 : generator(tr->pe[i],tr->phipe[i]);
		m = 1, n = 0;
		do {
			tr->discretelog[i][m] = n;
			if (tr->P[i] == 2 && tr->e[i] > 2)
				tr->discretelog[i][tr->pe[i]-m] = n+(1<<(tr->e[i]-2));
			n++;
			m = (long)m*g%tr->pe[i];
		} while (m != 1);

		for (m=0;m<4*tr->pe[i];m++)
			tr->sqrtmod4pe[i][m] = -1;
		for (m=tr->pe[i];m>=0;m--)
			tr->sqrtmod4pe[i][(long)m*m%(4*tr->pe[i])] = m;
	}

	tr->nchar = 0;
  char_recurse(tr,a,s,0);
	chicmp_nP = tr->nP;
  qsort(tr->allchi,tr->nchar,sizeof(tr->allchi[0]),chicmp);
	for (i=0;i<ntest;i++) {
		arb_init(tr->s1[i]);
		arb_init(tr->s2[i]);
	}
	return tr;
}

static void trace_clear(trace_t *tr) {
	int i;
	for (i=0;i<ntest;i++) {
		arb_clear(tr->s1[i]);
		arb_clear(tr->s2[i]);
	}
	free(tr);
}

typedef void (*function)(acb_t,int,int,acb_srcptr);
// integral of f (with parameter i) from m to m+1
// f is assumed to be analytic on the closed unit disc around m+1/2
static void integral(arb_t res,function f,int i,int m) {
	static int init;
	static arb_t h,x,a,s,c;
	static acb_t z;
	int k,n;

	if (!init) {
		arb_init(h); arb_init(x); arb_init(a);
		arb_init(s); arb_init(c);
		acb_init(z);
		init = 1;
	}

	// choose the number of steps so that the error is roughly 2^-prec
	for (n=1;5*n/log(5*n)-4<prec*M_LN2;n++);
	arb_log_ui(h,5*n,prec);
	arb_div_ui(h,h,n,prec);

	arb_zero(res);
	for (k=-n;k<=n;k++) {
		// a = h*cosh(k*h)/cosh(sinh(k*h))^2, x = tanh(sinh(k*h))
		arb_mul_si(x,h,k,prec);
		arb_sinh_cosh(s,c,x,prec);
		arb_cosh(a,s,prec);
		arb_mul(a,a,a,prec);
		arb_div(a,c,a,prec);
		arb_mul(a,a,h,prec);
		arb_tanh(x,s,prec);

		arb_add_si(x,x,2*m+1,prec);
		arb_mul_2exp_si(acb_realref(z),x,-1);
		arb_zero(acb_imagref(z));
		f(z,i,m,z);
		arb_addmul(res,a,acb_realref(z),prec);
	}

	// to compute the error term, sample f on the circle
	// of radius 1 around m+1/2
	// use 2n angle intervals
	arb_zero(c);
	arb_set_si(s,2*m+1); arb_mul_2exp_si(s,s,-1); // s = m+1/2
	for (k=-n;k<n;k++) {
		arb_add_si(x,zeroone,k,prec);
		arb_div_ui(acb_realref(z),x,n,prec);
		arb_zero(acb_imagref(z));
		acb_exp_pi_i(z,z,prec);
		arb_add(acb_realref(z),acb_realref(z),s,prec);
		f(z,i,m,z);
		acb_abs(a,z,prec);
		arb_union(c,c,a,prec);
	}
	arb_neg(a,c);
	arb_union(c,c,a,prec);

	// h = exp(4-5*n/log(5*n))
	arb_ui_div(h,5,h,prec);
	arb_sub_ui(h,h,4,prec);
	arb_neg(h,h);
	arb_exp(h,h,prec);

	arb_mul(c,c,h,prec);
	arb_add(res,res,c,prec);
	arb_mul_2exp_si(res,res,-1);
}

static long arbfloor(arb_srcptr x) {
	static int init;
	static arf_t a,b;

	if (!init) {
		arf_init(a); arf_init(b);
		init = 1;
	}
	arb_get_interval_arf(a,b,x,prec);
	return arf_get_si(a,ARF_RND_FLOOR);
}

static long arbceil(arb_srcptr x) {
	static int init;
	static arf_t a,b;

	if (!init) {
		arf_init(a); arf_init(b);
		init = 1;
	}
	arb_get_interval_arf(a,b,x,prec);
	return arf_get_si(b,ARF_RND_CEIL);
}

static void record(arb_t s[],arb_srcptr x,arb_srcptr y) {
	static int init;
	static arb_t t,x1;
	int i,imax;

	if (!init) {
		arb_init(t); arb_init(x1);
		init = 1;
	}

	myabs(x1,x,prec);
	arb_mul(x1,x1,deltainv,prec);
	arb_sub_ui(t,x1,1,prec);
	i = arbfloor(t);
	arb_add_ui(t,x1,1,prec);
	imax = arbceil(t);
	if (i < 0) i = 0;
	if (imax >= ntest) imax = ntest-1;
	for (;i<=imax;i++) {
		g(t,i,x1);
		arb_addmul(s[i],t,y,prec);
	}
}

// record y * integral from -infty to infty of the function f
// f is assumed to be even
static void recordint(arb_t s[],function f,arb_srcptr y) {
	static arb_t t;
	static int ncache,mcache;
	static struct {
		function f;
		int gsupp;
		arb_t s[ntest];
	} *cache;
	int i,j,k;

	if (!mcache) {
		arb_init(t);
		mcache = 16;
		cache = malloc(mcache*sizeof(cache[0]));
	}
	for (k=0;k<ncache;k++)
		if (cache[k].f == f && cache[k].gsupp == gsupp) break;
	if (k == ncache) {
		if (ncache == mcache) {
			mcache <<= 1;
			cache = realloc(cache,mcache*sizeof(cache[0]));
		}
		for (i=0;i<ntest;i++) {
			arb_init(cache[k].s[i]);
			for (j=(i<2)?0:i-2;j<=i+1;j++) {
				integral(t,f,i,j);
				arb_mul_2exp_si(t,t,1);
				arb_add(cache[k].s[i],cache[k].s[i],t,prec);
			}
		}
		cache[k].f = f;
		cache[k].gsupp = gsupp;
		ncache++;
	}
	for (i=0;i<ntest;i++)
		arb_addmul(s[i],y,cache[k].s[i],prec);
}

static void l(arb_t res,int n) {
	if (!n)
		arb_zero(res);
	else {
		arb_log_ui(res,n,prec);
		arb_mul_ui(res,res,(long)n*n*n,prec);
		arb_div_ui(res,res,6,prec);
	}
}

// record y * integral from 0 to infty g'(x)*log(x/4)
static void recordlog(arb_t s[],arb_srcptr y) {
	static int init;
	static arb_t t1,t2,dinvy;
	int i,j,c[5]={1,-4,6,-4,1};

	if (!init) {
		arb_init(t1); arb_init(t2); arb_init(dinvy);
		init = 1;
	}

	arb_mul(dinvy,deltainv,y,prec);

	// s[0] += y*deltainv*(6*log(deltainv)+11)/9;
	arb_log(t1,deltainv,prec);
	arb_mul_ui(t1,t1,6,prec);
	arb_add_ui(t1,t1,11,prec);
	arb_div_ui(t2,t1,9,prec);
	arb_addmul(s[0],t2,dinvy,prec);

	if (ntest > 1) {
		// s[1] += y*deltainv*(6*log(deltainv)+11+27*log(16./27))/36;
		arb_set_ui(t2,16);
		arb_div_ui(t2,t2,27,prec);
		arb_log(t2,t2,prec);
		arb_mul_ui(t2,t2,27,prec);
		arb_add(t2,t2,t1,prec);
		arb_div_ui(t2,t2,36,prec);
		arb_addmul(s[1],t2,dinvy,prec);
	}

	for (i=2;i<ntest;i++) {
		// s[i] -= y*deltainv*(l(i+2)-4*l(i+1)+6*l(i)-4*l(i-1)+l(i-2))/2;
		arb_zero(t2);
		for (j=-2;j<=2;j++) {
			l(t1,i+j);
			arb_addmul_si(t2,t1,c[j+2],prec);
		}
		arb_mul_2exp_si(t2,t2,-1);
		arb_submul(s[i],t2,dinvy,prec);
	}
}

// g(u) * cosh(u/2)
static void cst(acb_t res,int i,int n,acb_srcptr dinvx) {
	static int init;
	static acb_t t;

	if (!init) {
		acb_init(t);
		init = 1;
	}
	acb_mul_arb(t,dinvx,delta,prec);
	acb_mul_2exp_si(t,t,-1);
	acb_cosh(t,t,prec);
	gint(res,i,n,dinvx);
	acb_mul(res,res,t,prec);
}

// constant eigenfunction
static void constant(trace_t *tr) {
	int j;
	if (tr->chi->cond == 1) {
		for (j=0;j<tr->nP;j++)
			if (tr->e[j] > 1) break;
		if (j == tr->nP) {
			recordint(tr->s1,cst,(tr->nP & 1) ? one : negone);
			recordint(tr->s2,cst,(tr->nP & 1) ? one : negone);
		}
	}
}

// h(0) term when N=1
static void h0(trace_t *tr) {
	static int init;
	static arb_t t;

	if (tr->N == 1) {
		if (!init) {
			arb_init(t);
			arb_mul_2exp_si(t,negone,-2);
			init = 1;
		}
		recordint(tr->s1,gint,t);
		recordint(tr->s2,gint,t);
	}
}

static void id(acb_t res,int i,int n,acb_srcptr dinvx) {
	static int init;
	static acb_t t;
    
	if (!init) {
		acb_init(t);
		init = 1;
	}
	acb_mul_arb(t,dinvx,delta,prec);
	acb_mul_2exp_si(t,t,-1);
	acb_mul_onei(t,t);
	acb_sinc(t,t,prec);
	acb_mul_2exp_si(t,t,-1);
	gpxint(res,i,n,dinvx);
	acb_div(res,res,t,prec);
}

static int idcoeff(int p,int e,int s) {
	if (e == s) return intpow(p,e-1)*(p+1);
	if (p == 2) {
		if (e <= 2) return 1;
		return 3<<(e-3);
	} else {
		if (e == 1) return p-1;
		if (e == 2 && s == 0) return (p-1)*(p-1)/2;
		if (e == 2 && s == 1) return (p*p-1)/2;
		return intpow(p,e-3)*(p-1)*(p*p-1)/(2-(e&1));
	}
}

static void identity(trace_t *tr) {
	static int init;
	static arb_t f;
	int p,j;

	if (!init) {
		arb_init(f);
		init = 1;
	}

	// \prod_{p|N} p^{e-1}(p+1) / (-12)
	arb_one(f);
	for (j=0;j<tr->nP;j++)
		arb_mul_si(f,f,idcoeff(tr->P[j],tr->e[j],tr->chi->s[j]),prec);
	arb_div_si(f,f,-12,prec);
	recordint(tr->s1,id,f);
}
 
static void par1(acb_t res,int i,int n,acb_srcptr dinvx) {
	static int init;
	static acb_t t,x;

	if (!init) {
		acb_init(t); acb_init(x);
		init = 1;
	}

	// g'(x)/x*(x*log(sinh(x/4)*4/x))
	acb_mul_arb(x,dinvx,delta,prec);
	acb_mul_2exp_si(t,x,-2);
	acb_mul_onei(t,t);
	acb_sinc(t,t,prec);
	acb_log(t,t,prec);
	acb_mul(t,t,x,prec);
	gpxint(res,i,n,dinvx);
	acb_mul(res,res,t,prec);
}

static void par2(acb_t res,int i,int n,acb_srcptr dinvx) {
	static int init;
	static acb_t t,x;

	if (!init) {
		acb_init(t); acb_init(x);
		init = 1;
	}

	// g'(x)/x*(x*log(sinh(x/2)*2/x))
	acb_mul_arb(x,dinvx,delta,prec);
	acb_mul_2exp_si(t,x,-1);
	acb_mul_onei(t,t);
	acb_sinc(t,t,prec);
	acb_log(t,t,prec);
	acb_mul(t,t,x,prec);
	gpxint(res,i,n,dinvx);
	acb_mul(res,res,t,prec);
}

// floor(log(x)/log(p))
static inline int logp(int x,int p) {
	int k;
	for (k=0;x>=p;k++) x /= p;
	return k;
}

// Im(chi_p(z)) if chi_p is odd and n < 0
// Re(chi_p(z)) otherwise
static void chi_p(arb_t res,trace_t *tr,int j,int n,long z) {
	int t;
	long x;

	if (!(z % tr->P[j])) {
		if (tr->chi->s[j]) arb_zero(res); else arb_one(res);
		return;
	}
	z %= tr->pe[j];
	if (z < 0) z += tr->pe[j];
	// compute x such that chi_p(z) = e(x/phi(p^e))
	if (tr->P[j] == 2 && tr->e[j] > 2) {
		t = tr->discretelog[j][z];
		x = (long)t*(tr->chi->a[j] & ~1);

		// add 1/2 if chi_2 is odd and z is not a power of 5
		if ((tr->chi->a[j]&1) && (t>>(tr->e[j]-2)))
			x += tr->phipe[j]/2;
	} else
		x = (long)tr->chi->a[j]*tr->discretelog[j][z];
	x %= tr->phipe[j];

	arb_set_si(res,2*x);
	arb_div_ui(res,res,tr->phipe[j],prec);
	if ((tr->chi->a[j]&1) && n < 0)
		arb_sin_pi(res,res,prec);
	else
		arb_cos_pi(res,res,prec);
}

// \Psi_3^{\rm min}
static int psi3min(int p,int N,int q) {
	int e=ordp(N,p),s=ordp(q,p);

	if (e == s) return 4*e-1;
	if (e == 1) return 2;
	if (p == 2) {
		if (e == 2)
			return 1;
		if (e & 1)
			return 1<<(e/2);
		return 3<<(e/2-2);
	} else {
		if (e == 2 && s == 0)
			return (p-1)/2;
		if (e == 2 && s == 1)
			return (p+1)/2;
		if (e & 1)
			return 2*(p-1)*intpow(p,(e-3)/2);
		return (p*p-1)/2*intpow(p,(e-4)/2);
	}
}

static void parabolic(trace_t *tr) {
	static int init;
	static arb_t L,f1,f2,t;
	int i,j,m,n,p,T,N1;
 
	if (!init) {
		arb_init(L); arb_init(t);
		arb_init(f1); arb_init(f2);
		init = 1;
	}
 
	T = (int)expl((long double)gsupp/2);
	N1 = tr->N/tr->chi->cond;
	if (N1 == 1) {
		for (p=2;p<=T;p++) {
			if (factor[p] || !(tr->chi->cond % p)) continue;
			arb_log_ui(L,p,prec);
			m = logp(T,p);
			for (i=1,n=p;i<=m;i++,n*=p) {
				arb_div_si(f1,L,n,prec);
				arb_mul_2exp_si(f1,f1,tr->nP+1);
				arb_set(f2,f1);
				for (j=0;j<tr->nP;j++) {
					chi_p(t,tr,j,1,n);
					arb_mul(f1,f1,t,prec);
					chi_p(t,tr,j,-1,n);
					arb_mul(f2,f2,t,prec);
				}
				if (tr->chi->num_odd & 3) arb_neg(f2,f2);

				arb_mul_si(t,L,2*i,prec);
				record(tr->s1,t,f1);
				record(tr->s2,t,f2);
			}
		}

		// g(0) term for n=1
		// 2^omega(N)*(2*Euler+log(8*Pi)-2*log(N)+log(sqfreepart(N))/2)
		arb_mul_2exp_si(L,euler,1);
		arb_add(L,L,logpi,prec);
		arb_addmul_ui(L,ln2,3,prec);
		arb_log_ui(t,tr->N,prec);
		arb_submul_ui(L,t,2,prec);
		for (j=0,m=1;j<tr->nP;j++)
			m *= tr->P[j];
		arb_log_ui(t,m,prec);
		arb_mul_2exp_si(t,t,-1);
		arb_add(L,L,t,prec);
		arb_mul_2exp_si(L,L,tr->nP);
		record(tr->s1,zero,L);

		// integral terms for n=1
		// -2^omega(N)*(2*integral(0..oo,log(u/4)*g'(u)))
		// -2^omega(N)*(2*integral(0..oo,log(sinh(u/4)/(u/4))*g'(u)))
		arb_mul_2exp_si(L,negone,tr->nP);
		recordint(tr->s1,par1,L);
		arb_mul_2exp_si(L,L,1);
		recordlog(tr->s1,L);

		if (!tr->chi->num_odd) {
			// g(0) term for n=-1
			// 2^omega(N)*(Euler+log(4*Pi)-log(N)+(N%2)*log(2)/2)
			arb_add(L,euler,logpi,prec);
			arb_addmul_ui(L,ln2,2,prec);
			arb_log_ui(t,tr->N,prec);
			arb_sub(L,L,t,prec);
			if (tr->N & 1) {
				arb_mul_2exp_si(t,ln2,-1);
				arb_add(L,L,t,prec);
			}
			arb_mul_2exp_si(L,L,tr->nP);
			record(tr->s2,zero,L);

			// integral terms for n=-1
			// -2^omega(N)*integral(0..oo,log(u/4)*g'(u))
			// -2^omega(N)*integral(0..oo,log(sinh(u/2)/(u/2))*g'(u))
			arb_mul_2exp_si(L,negone,tr->nP-1);
			recordint(tr->s2,par2,L);
			arb_mul_2exp_si(L,L,1);
			recordlog(tr->s2,L);
		}
	} else if (!factor[N1] && tr->chi->cond % N1) {
		// t = -2^omega(N)*log(N1);
		p = N1;
		arb_log_ui(L,p,prec);
		arb_mul_2exp_si(t,L,tr->nP);
		arb_neg(t,t);
		record(tr->s1,zero,t);

		if (!tr->chi->num_odd) {
			if (N1 == 2) {
				arb_mul_si(t,ln2,-3,prec);
				arb_mul_2exp_si(t,t,tr->nP-2);
			} else
				arb_mul_2exp_si(t,t,-1);
			record(tr->s2,zero,t);
		}

		m = logp(T,p);
		for (i=1,n=p;i<=m;i++,n*=p) {
			arb_mul_2exp_si(f1,L,tr->nP);
			arb_div_si(f1,f1,-n,prec);
			arb_set(f2,f1);
			for (j=0;j<tr->nP;j++) {
				chi_p(t,tr,j,1,n);
				arb_mul(f1,f1,t,prec);
				chi_p(t,tr,j,-1,n);
				arb_mul(f2,f2,t,prec);
			}
			if (tr->chi->num_odd & 3) arb_neg(f2,f2);

			arb_mul_si(t,L,2*i,prec);
			record(tr->s1,t,f1);
			record(tr->s2,t,f2);
		}
	} else {
		if (!(p=factor[N1])) p = N1;
		if (N1 == intpow(p,ordp(N1,p))) {
			arb_log_ui(L,p,prec);
			arb_mul_si(t,L,-psi3min(p,tr->N,tr->chi->cond),prec);
			arb_mul_2exp_si(t,t,tr->nP-1);
			record(tr->s1,zero,t);
			if (!tr->chi->num_odd && (N1 == 4 || N1 == 8) && (tr->chi->cond & 1)) {
				arb_mul_2exp_si(t,ln2,tr->nP-2);
				arb_neg(t,t);
				record(tr->s2,zero,t);
			}
		}
	}
}

static void e1(acb_t res,int i,int n,acb_srcptr dinvx) {
	static int init;
	static acb_t t,c,s;

	if (!init) {
		acb_init(t); acb_init(c); acb_init(s);
		init = 1;
	}

	// cosh(x/2)/(3*cosh(x/2)^2+sinh(x/2)^2)*g(x)
	acb_mul_arb(t,dinvx,delta,prec);
	acb_mul_2exp_si(t,t,-1);
	acb_sinh_cosh(s,c,t,prec);
	acb_mul(t,c,c,prec);
	acb_mul_ui(t,t,3,prec);
	acb_addmul(t,s,s,prec);
	acb_div(t,c,t,prec);
	gint(res,i,n,dinvx);
	acb_mul(res,res,t,prec);
}

static void e2(acb_t res,int i,int n,acb_srcptr dinvx) {
	static int init;
	static acb_t t;

	if (!init) {
		acb_init(t);
		init = 1;
	}

	// g(x)/cosh(x/2)
	acb_mul_arb(t,dinvx,delta,prec);
	acb_mul_2exp_si(t,t,-1);
	acb_cosh(t,t,prec);
	gint(res,i,n,dinvx);
	acb_div(res,res,t,prec);
}

static void Sp(arb_t res,trace_t *tr,int j,int t,int n,long d,int l) {
	int p,e,s,f,g,k,pe1,pe,pf,x;
	long lsqrtd;

	p = tr->P[j];
	e = tr->e[j];
	s = tr->chi->s[j];
	f = ordp(l,p);
	g = ordp(d,p)+2*f;
	k = kronecker(d,p);
	pe1 = intpow(p,e-1);
	pe = p*pe1;
	pf = intpow(p,f);
	x = d%(4*pe);
	if (x < 0) x += 4*pe;
	lsqrtd = (long)l*tr->sqrtmod4pe[j][x];

	if (p == 2) {
		if (s == e) {
			if (g >= 2*e) {
				x = ((d&1)<<(g/2+1))+(k-1)*((3<<(e-1))-(1<<(g/2+1)));
				if (g == 2*e) x = -x;
				chi_p(res,tr,j,n,t/2);
				arb_mul_si(res,res,x,prec);
			} else if (k == 1) {
				chi_p(res,tr,j,n,(t+lsqrtd)/2);
				arb_mul_si(res,res,2*pf,prec);
			} else
				arb_zero(res);
		} else {
			x = 0;
			if (e >= 3) {
				if (g > e)
					x = 3;
				else if (g == e)
					x = 2*(e&1)-3;
				else if (g == e-1 && (e&1))
					x = (1-(d&1)*4)*(s <= 2 ? -1 : 1);
			} else if (e == 2)
				x = (g > 0) ? 2 : -1;
			else
				x = 4;

			x *= (k-1);
			if (x) {
				chi_p(res,tr,j,n,t/2);
				arb_mul_si(res,res,x,prec);
				arb_mul_2exp_si(res,res,e-3);
			} else
				arb_zero(res);
		}
	} else {
		if (s == e) {
			if (g >= 2*e-1) {
				chi_p(res,tr,j,n,(t+pe*(t&1))/2);
				arb_mul_si(res,res,2*pf+(1-k)*(2*pf-(pe+pe/p))/(p-1),prec);
			} else if (k == 1) {
				chi_p(res,tr,j,n,(t+lsqrtd)/2);
				arb_mul_si(res,res,2*pf,prec);
			} else
				arb_zero(res);
		} else {
			if (e != 1 && kronecker(n,p) != 1 || g < e-2 || k == 1)
				arb_zero(res);
			else {
				x = pe1;
				if (e == 2)
					x -= 1-2*(tr->chi->a[j]&1);
				if (e > 2)
					x -= pe1/(p*p);
				if (g == e-2)
					x -= (pe1-k*pe1/p);
				x >>= (1-(e&1));
				x *= (k-1);
				chi_p(res,tr,j,n,(t+pe*(t&1))/2);
				arb_mul_si(res,res,x,prec);
			}
		}
	}
}

static void Sproduct(arb_t res,trace_t *tr,int t,int n,long d,int l) {
	int j,k,ord,p,u;
	static arb_t f;
	static int init;

	if (!init) {
		arb_init(f);
		init = 1;
	}

	arb_one(res);
	for (j=0;j<tr->nP;j++) {
		Sp(f,tr,j,t,n,d,l);
		arb_mul(res,res,f,prec);
	}
	if (n < 0 && (tr->chi->num_odd & 3))
		arb_neg(res,res);
	for (u=l;u>1;u/=intpow(p,ord)) {
		if (!(p=factor[u])) p = u;
		ord = ordp(u,p);
		if (tr->N % p)
			arb_mul_si(res,res,(intpow(p,ord)-1)/(p-1)*(p-kronecker(d,p))+1,prec);
	}
}

static void elliptic(trace_t *tr) {
	static int init;
	static arb_t f;

	if (!init) {
		arb_init(f);
		init = 1;
	}

	Sproduct(f,tr,1,1,-3,1);
	arb_mul_2exp_si(f,f,1);
	arb_div_si(f,f,3,prec);
	recordint(tr->s1,e1,f);

	Sproduct(f,tr,0,1,-4,1);
	arb_mul_2exp_si(f,f,-3);
	recordint(tr->s1,e2,f);
}

static int list[1<<16];
// trace of Tn
static void hyperbolic(trace_t *tr,int n) {
	static int init;
	static arb_t temp,logeps1,f;
	int t,T,length=0,x,y,i,j,k,h,u0,p,q,*sqf;
	long int d,x4,t4,tnext;
	//unsigned char buf[3];
	FILE *fp;
	arb_t *s;

	if (!init) {
    arb_init(f);
		arb_init(temp); arb_init(logeps1);
		init = 1;
	}

	T = (int)(2*coshl((long double)gsupp/2));
	if (n > 0) {
		fp = fopen("clt2m4.new","r");
		for (i=0;i<3;i++) // skip t = 0,1,2
			fread(&h,4,1,fp);
		sqf = sqfm;
		s = tr->s1;
	} else {
		fp = fopen("clt2p4.new","r");
		fread(&h,4,1,fp); // skip t = 0
		sqf = sqfp;
		s = tr->s2;
	}

	for (t=n+2;t<=T;t++) {
		/* read class number */
		fread(&h,4,1,fp);
		//h = (int)buf[0] | (int)buf[1]<<8 | (int)buf[2]<<16;
		if (length > 0 && t == *list) {
			for (i=1;i<length;i++) list[i-1] = list[i];
			length--;
		} else {
			/* compute fundamental discriminant */
			t4 = (long)t*t-4*n;
			d = t4/((long)sqf[t]*sqf[t]);

			// log((t+sqrt(t4))/2)
			arb_sqrt_ui(temp,t4,prec);
			arb_add_ui(temp,temp,t,prec);
			arb_mul_2exp_si(temp,temp,-1);
			arb_log(temp,temp,prec);

			/* compute narrow class number and log(epsilon_1) */
			if (n > 0) {
				i = (int)sqrtl((long double)(t-2));
				if (t-2 != i*i) h <<= 1;
				arb_set(logeps1,temp);
			} else
				arb_mul_2exp_si(logeps1,temp,1);

			/* loop over all powers of this unit */
			for (i=0,x=2,y=0,tnext=t,k=1;tnext<=T;
					++k,tnext=((long)x*t+y*t4)>>1) {
				y = (x+y*t)>>1, x = (int)tnext;
				if (n < 0 && !(k&1)) continue;

				/* compute u s.t. x^2-du^2 = 4n */
				x4 = (long)x*x-4*n;
				u0 = (int)sqrtl((long double)(x4/d));
				Sproduct(f,tr,x,n,d,u0);

				// extra factor of 2 to add contribution from t and -t
				arb_mul_si(f, f, 2*h, prec);
				arb_mul(f, f, logeps1, prec);
				arb_sqrt_ui(temp,x4,prec);
				arb_div(f,f,temp,prec);

				// temp = 2 * log((x+\sqrt{x^2-4n})/2)
				arb_add_si(temp, temp, x, prec);
				arb_mul_2exp_si(temp,temp,-1);
				arb_log(temp, temp, prec);
				arb_mul_2exp_si(temp,temp,1);

				record(s,temp,f);

			/* sort into table */
				if (x > t) {
					for (;i<length && list[i] <= x;i++);
					for (j=length-1;j>=i;j--) list[j+1] = list[j];
					list[i] = x, length++;
				}
			}
		}
	}
	fclose(fp);
}

static int strmodp(char *s,int p) {
	int i,x=1,total=0;
	for (i=strlen(s)-1;i>=0;i--) {
		total = (total+x*(s[i]-'0')) % p;
		x = (long)x*10%p;
	}
	return total;
}

static int outside_support(arb_srcptr x) {
	static int init;
	static arb_t t;

	if (!init) {
		arb_init(t);
		init = 1;
	}
	arb_sub_ui(t,x,gsupp,prec);
	return arb_is_positive(t);
}

static void special(trace_t *tr) {
	static int init;
	static arb_t t1,t2,f0,f1,r,z[nPmax];
	int i,j,m,d,dmax,t;
	int lifts,num,den,c[nPmax];

	if (!init) {
		arb_init(t1); arb_init(t2);
		arb_init(f0); arb_init(f1); arb_init(r);
		for (j=0;j<nPmax;j++) arb_init(z[j]);
		init = 1;
	}

	// we only handle squarefree levels
	for (j=0;j<tr->nP;j++)
		if (tr->e[j] > 1 || !tr->chi->s[j])
			return;

	for (j=0,dmax=1;j<tr->nP;j++)
		if (tr->chi->a[j] == (tr->P[j]>>1))
			dmax *= tr->P[j];
	for (d=5;d<=dmax;d+=4) {
		if (dmax % d) continue;
		for (j=0;j<tr->nP;j++)
			if (kronecker(d,tr->P[j])==-1) break;
		if (j < tr->nP) continue;

		// compute regulator
		arb_set_str(t1,quad[d].unit,prec);
		arb_mul(r,t1,t1,prec);
		arb_sub_si(r,r,4*quad[d].norm,prec);
		arb_sqrt(r,r,prec);
		arb_add(r,r,t1,prec);
		arb_mul_2exp_si(r,r,-1);
		arb_log(r,r,prec);

		// r = 2*regulator
		arb_mul_2exp_si(r,r,1);
		lifts = 1;
		for (j=0;j<tr->nP;j++)
			if (d % tr->P[j]) {
				m = strmodp(quad[d].unit,tr->P[j]);
				t = (m*m-4*quad[d].norm+4*tr->P[j]) % (4*tr->P[j]);
				t = (m+tr->sqrtmod4pe[j][t])/2 % tr->P[j];
				c[j] = tr->discretelog[j][t] * tr->chi->a[j] % (tr->P[j]-1);
				arb_set_ui(z[j],2*c[j]);
				arb_div_ui(z[j],z[j],tr->P[j]-1,prec);
				lifts <<= 1;
			} else {
				arb_zero(z[j]);
				c[j] = -1;
			}

		// add 1/4-eigenspace back in
		for (i=0;i<lifts;i++) {
			m = i, num = 0, den = 1;
			for (j=0;j<tr->nP;j++)
				if (c[j] >= 0) {
					num *= (tr->P[j]-1);
					if (m & 1)
						num += ((tr->chi->a[j]&1)*(tr->P[j]-1)/2-c[j])*den;
					else
						num += c[j]*den;
					den *= (tr->P[j]-1);
					num %= den;
					m >>= 1;
				}
			if (num < 0) num += den;
			if (quad[d].norm > 0) {
				if (!num) {
					arb_set_ui(t1,quad[d].h);
					recordint(tr->s1,gint,t1);
				}
			} else {
				if (!num) {
					arb_set_ui(t1,quad[d].h);
					arb_mul_2exp_si(t1,t1,-1);
					recordint(tr->s1,gint,t1);
					recordint(tr->s2,gint,t1);
				}
				if (den == 2*num) {
					arb_set_ui(t1,quad[d].h);
					arb_mul_2exp_si(t1,t1,-1);
					recordint(tr->s1,gint,t1);
					arb_neg(t1,t1);
					recordint(tr->s2,gint,t1);
				}
			}
		}

		// f0 = -2*regulator*quad[d].h*lifts;
		arb_mul_si(f0,r,-quad[d].h*lifts,prec);
		record(tr->s1,zero,f0);
		if (quad[d].norm > 0)
			for (i=1;;i++) {
				arb_mul_ui(t1,r,i,prec);
				if (outside_support(t1)) break;
				arb_mul_2exp_si(f1,f0,1);
				for (j=0;j<tr->nP;j++)
					if (c[j] >= 0) {
						arb_mul_ui(t2,z[j],i,prec);
						arb_cos_pi(t2,t2,prec);
						arb_mul(f1,f1,t2,prec);
					}
				record(tr->s1,t1,f1);
			}
		else
			for (i=1;;i++) {
				arb_mul_ui(t1,r,i,prec);
				if (outside_support(t1)) break;
				arb_mul_2exp_si(f1,f0,1);
				for (t=j=0;j<tr->nP;j++)
					if (c[j] >= 0) {
						arb_mul_ui(t2,z[j],i,prec);
						if (tr->chi->a[j] & i & 1) {
							arb_sin_pi(t2,t2,prec);
							t++;
						} else
							arb_cos_pi(t2,t2,prec);
						arb_mul(f1,f1,t2,prec);
					}
				if (t & 2) arb_neg(f1,f1);
				record((i&1)?tr->s2:tr->s1,t1,f1);
			}
	}
}

static long double A[points][points*2];

/* compute and invert the quadratic form matrix */
static void gauss(arb_t s[]) {
	int j,j0,k,l,c=points*2;
	long double t,max;

	/* compute matrix augmented with 1's */
	for (j=0;j<points;j++)
		for (l=0;l<=j;l++)
			A[j][l] = (arb_get_d(s[j+l])+arb_get_d(s[j-l]))/2;
	for (j=0;j<points;j++)
		for (l=0;l<j;l++)
			A[l][j] = A[j][l];
	for (j=0;j<points;j++) {
		for (l=points;l<c;l++)
			A[j][l] = 0;
		A[j][j+points] = 1;
	}

	for (j=0;j<points;j++) {
		/* find the largest pivot */
		max = fabsl(A[j][j]), j0 = j;
		for (k=j+1;k<points;k++)
			if ( (t = fabsl(A[k][j])) > max)
				max = t, j0 = k;
		/* exchange current row with pivot row */
		for (l=j;l<c;l++)
			t = A[j][l], A[j][l] = A[j0][l], A[j0][l] = t;
		/* divide out pivot */
		t = 1/A[j][j];
		for (l=j;l<c;l++)
			A[j][l] *= t;
		/* eliminate lower columns */
		for (k=j+1;k<points;k++)
			for (t=A[k][j],l=j+1;l<c;l++)
				A[k][l] -= t * A[j][l];
	}

	/* eliminate upper columns */
	for (j=points-1;j>0;j--)
		for (k=j-1;k>=0;k--)
			for (t=A[k][j],l=j;l<c;l++)
				A[k][l] -= A[j][l]*t;
	
	/* copy over original matrix */
	for (j=0;j<points;j++)
		for (l=0;l<points;l++)
			A[j][l] = A[j][l+points];
}

static double minimize(arb_t s[]) {
	static int init;
	static arb_t q,a,t,y[points];
	long double x[points];
	int j,l,selberg;

	if (!init) {
		arb_init(q); arb_init(a); arb_init(t);
		for (j=0;j<points;j++) arb_init(y[j]);
		init = 1;
	}

	gauss(s);
	for (j=0;j<points;j++)
		for (x[j]=0,l=0;l<points;l++)
			x[j] += A[j][l];
	for (j=0;j<points;j++)
		arb_set_d(y[j],(double)x[j]);

	arb_zero(q);
	for (j=0;j<points;j++)
		for (l=0;l<=j;l++) {
			arb_add(a,s[j+l],s[j-l],prec);
			if (l == j) arb_mul_2exp_si(a,a,-1);
			arb_mul(t,y[j],y[l],prec);
			arb_addmul(q,t,a,prec);
		}

	arb_zero(t);
	for (l=0;l<points;l++)
		arb_add(t,t,y[l],prec);
	arb_mul(t,t,t,prec);
	arb_div(q,q,t,prec);

	selberg = 0;
	arb_sub_ui(t,q,1,prec);
	if (arb_is_negative(t)) {
		selberg = !arb_is_negative(q);
		for (j=1;j<points&&selberg;j++)
			if (arb_is_nonnegative(y[j-1]) && arb_is_negative(y[j]))
				selberg = 0;
	}
	return selberg ? 0 : arb_get_d(q);
}

static int chi_eval(trace_t *tr,int n) {
	int z,t,j;
	long x,num,den;

	num = 0, den = 1;
	for (j=0;j<tr->nP;j++) {
		z = n % tr->pe[j];
		if (z < 0) z += tr->pe[j];

		// compute x such that chi_p(z) = e(x/phi(p^e))
		if (tr->P[j] == 2 && tr->e[j] > 2) {
			t = tr->discretelog[j][z];
			x = (long)t*(tr->chi->a[j] & ~1);

			// add 1/2 if chi_2 is odd and z is not a power of 5
			if ((tr->chi->a[j]&1) && (t>>(tr->e[j]-2)))
				x += tr->phipe[j]/2;
		} else
			x = (long)tr->chi->a[j]*tr->discretelog[j][z];

		num = tr->phipe[j]*num+x*den;
		den *= tr->phipe[j];
		num %= den;
	}
	return (int)num;
}

static void dihedral(trace_t *tr) {
	int i,j;

	for (i=0;dihedral_list[i].cond;i++)
	if (dihedral_list[i].cond  == tr->N) {
		for (j=0;dihedral_list[i].val[j][0];j++)
			if (chi_eval(tr,dihedral_list[i].val[j][0])
				!= dihedral_list[i].val[j][1])
				break;
		if (!dihedral_list[i].val[j][0]) {
			recordint(tr->s1,gint,negone);
			recordint(tr->s2,gint,dihedral_list[i].parity ? one : negone);
		}
	}
}

static void exotic(trace_t *tr) {
	int i,j;

	for (i=0;exotic_list[i].cond;i++)
	if (exotic_list[i].cond  == tr->N) {
		for (j=0;j<tr->nP;j++)
			if (exotic_list[i].a[j] != tr->chi->a[j])
				break;
		if (j == tr->nP) {
			recordint(tr->s1,gint,negone);
			recordint(tr->s2,gint,exotic_list[i].parity ? one : negone);
		}
	}
}

static void trace(trace_t *tr,int c,int gs) {
	int i,j,mult,oddtwist;

	// delta = gsupp/(2*points), deltainv = 1/delta
	gsupp = gs;
	arb_set_ui(delta,gsupp);
	arb_div_ui(delta,delta,2*points,prec);
	arb_set_ui(deltainv,2*points);
	arb_div_ui(deltainv,deltainv,gsupp,prec);

	tr->chi = &tr->allchi[c];
	for (i=0;i<ntest;i++) {
		arb_zero(tr->s1[i]);
		arb_zero(tr->s2[i]);
	}
	identity(tr);
	elliptic(tr);
	hyperbolic(tr,1);
	hyperbolic(tr,-1);
	parabolic(tr);
	constant(tr);
	h0(tr);
	dihedral(tr);
	special(tr);

	oddtwist = 0, mult = 0;
	for (j=0;j<tr->nP;j++)
		if (tr->P[j] == 2) {
			if (tr->e[j] <= 3 && tr->e[j] == tr->chi->s[j]) {
				mult++;
				if (tr->chi->a[j] & 1) oddtwist = 1;
			} else if (tr->e[j] >= 4 && tr->e[j] != tr->chi->s[j]) {
				mult++;
				oddtwist = 1;
				if (tr->e[j] >= 6) mult++;
			}
		} else {
			if (tr->e[j] == 1 && tr->chi->a[j] == (tr->P[j]>>1)
			|| tr->e[j] >= 2 && tr->e[j] != tr->chi->s[j]) {
				mult++;
				if (tr->P[j] & 2) oddtwist = 1;
			}
	}
	mult -= oddtwist;
	for (i=0;i<ntest;i++) {
		arb_mul_2exp_si(tr->s1[i],tr->s1[i],-mult);
		arb_mul_2exp_si(tr->s2[i],tr->s2[i],-mult);
	}

	exotic(tr);
}

// smallest square multiple of lcm(N,q^2)
static long square_multiple(trace_t *tr,int c) {
	int j,t;
	long res;

	for (j=0,res=1;j<tr->nP;j++) {
		t = tr->e[j]+(tr->e[j]&1);
		if (t < 2*tr->allchi[c].s[j])
			t = 2*tr->allchi[c].s[j];
		res *= intpow(tr->P[j],t);
	}
	return res;
}

static inline int lcm(int x,int y) {
	return (long)x*y/gcd(x,y);
}

static int ord_chi_p(int a,int p,int phi) {
	if (p > 2 || phi < 4)
		return phi/gcd(phi,a);
	return lcm((a&1)+1,(phi>>1)/gcd(phi>>1,a>>1));
}

static int chi_power(int *a,trace_t *tr,int c,int m) {
	int j;
	for (j=0;j<tr->nP;j++)
		if (tr->P[j] > 2 || tr->e[j] <= 2)
			a[j] = (long)m*tr->allchi[c].a[j] % tr->phipe[j];
		else
			a[j] = 2*((long)m*(tr->allchi[c].a[j]>>1) % (tr->phipe[j]>>1))
				+ (m & tr->allchi[c].a[j] & 1);
}

// count twist-inequivalent conjugates of chi
static int galois_conjugates(trace_t *tr,int c) {
	int c2,h,j,k,m,a[nPmax];

	// compute order of chi
	for (j=0,h=1;j<tr->nP;j++)
		h = lcm(h,ord_chi_p(tr->allchi[c].a[j],tr->P[j],tr->phipe[j]));

	// count Galois conjugate characters
	k = 0;
	for (m=0;m<h;m++)
		if (gcd(m,h) == 1) {
			chi_power(a,tr,c,m);
			for (c2=0;c2<tr->nchar;c2++) {
				for (j=0;j<tr->nP;j++)
					if (tr->allchi[c2].a[j] != a[j])
						break;
				if (j == tr->nP)
					k++;
			}
		}
	return k;
}

// determine whether character c could admit a Galois rep
static int galois_admissible(trace_t *tr,int c) {
	int j,h,flag;

	// when e=1 or s, \chi_p must have order in {2,3,4,5}
	flag = 0;
	for (j=0;j<tr->nP;j++)
		if ((tr->e[j] == 1 || tr->e[j] == tr->allchi[c].s[j])) {
			h = ord_chi_p(tr->allchi[c].a[j],tr->P[j],tr->phipe[j]);
			if (h < 2 || h > 5) return 0;
			flag |= (1<<h);
		}

	// cannot have both 4 and 5 as orders
	return ((flag>>4) != 3);
}

#define nforks 64
int main(int argc,char *argv[]) {
	int i,j,N,N1,N2,c,c1,c2,parity,gs,maxgs,T,passed[2],conjugates,level;
	long l;
	int kforks=0,status;
	trace_t *tr;
	arb_t t,s[ntest];
	double q[2];

	if (argc < 2 || argc > 4) {
		printf("usage: %s support [N [chi]]\n",argv[0]);
		return 0;
	}

	sscanf(argv[1],"%d",&maxgs);
	if (maxgs < 1 || maxgs > 20) {
		printf("support must be an integer in [1,20]\n");
		return 0;
	}
	maxgs <<= 1;

	T = (int)(2*coshl((long double)maxgs/2));
	factor_table(Nmax < T+2 ? T+2 : Nmax);
	sqfm_table(T); sqfp_table(T);

	N1 = 1, N2 = Nmax-1;
	if (argc > 2) {
		sscanf(argv[2],"%d",&N);
		if ( !(N > 0 && N < Nmax) ) {
			printf("must have 0 < N < %d\n",Nmax);
			return 0;
		}
		N1 = N2 = N;

		if (argc > 3) {
			sscanf(argv[3],"%d",&c);
			parity = (c & 1);
			c >>= 1;
		}
	}

	arb_init(delta);
	arb_init(deltainv);
	arb_init(t);
	arb_init(zero);
	arb_init(one); arb_one(one);
	arb_init(negone); arb_neg(negone,one);
	arb_init(zeroone); arb_union(zeroone,zero,one,prec);
	arb_init(onetwo); arb_add_ui(onetwo,zeroone,1,prec);
	arb_init(ln2); arb_const_log2(ln2,prec);
	arb_init(logpi); arb_const_pi(t,prec); arb_log(logpi,t,prec);
	arb_init(euler); arb_const_euler(euler,prec);
	for (i=0;i<ntest;i++) arb_init(s[i]);

	if (argc <= 3)
		for (gs=2;gs<=maxgs;gs+=2) {
			gsupp = gs;
			arb_set_ui(delta,gsupp);
			arb_div_ui(delta,delta,2*points,prec);
			arb_set_ui(deltainv,2*points);
			arb_div_ui(deltainv,deltainv,gsupp,prec);
			recordint(s,cst,one);
			recordint(s,id,one);
			recordint(s,par1,one);
			recordint(s,par2,one);
			recordint(s,e1,one);
			recordint(s,e2,one);
			recordint(s,gint,one);
		}

	for (N=N1;N<=N2;trace_clear(tr),N++) {
		tr = trace_init(N);
		c1 = 0, c2 = tr->nchar-1;
		if (argc > 3) {
			if (c < c1 || c > c2) {
				printf("must have %d <= chi <= %d\n",c1,2*c2+1);
				return 0;
			}
			c1 = c2 = c;
		}
		for (c=c1;c<=c2;c++) {
#ifdef GAMMA_ONLY
			if ((l=square_multiple(tr,c)) >= Nmax) continue;
			level = (int)sqrt((double)l);
#endif

#ifdef GALOIS_ONLY
			if (!galois_admissible(tr,c)) continue;
#endif

			if (kforks == nforks) wait(&status); else kforks++;
			if (!fork()) {
				gs = 2;
				passed[0] = passed[1] = 0;
				if (argc > 3)
					gs = maxgs, passed[!parity] = 1;
				for (;gs<=maxgs&&!(passed[0]&&passed[1]);gs+=2) {
					trace(tr,c,gs);
					for (j=0;j<2;j++)
						if (!passed[j]) {
							for (i=0;i<ntest;i++) {
								(j ? arb_sub : arb_add)(s[i],tr->s1[i],tr->s2[i],prec);
								arb_mul_2exp_si(s[i],s[i],-1);
							}
							if (!(q[j]=minimize(s)))
								passed[j] = 1;
						}
				}
				for (j=0;j<2;j++)
					if (!passed[j]) {
#ifdef GAMMA_ONLY
						printf("Gamma(%d): ",level);
#endif
						printf("N=%d, chi=%d (",N,2*c+j);
						for (i=0;i<tr->nP;i++)
							printf("%s%d:%d:%d",i?",":"",tr->chi->a[i],tr->chi->s[i],
								ord_chi_p(tr->chi->a[i],tr->P[i],tr->phipe[i]));
						printf("), q=%.14g\n",q[j]);
						fflush(stdout);
					}
				return 0;
			}
		}
	}
	while (wait(&status) > 0);
	return 0;
}
