# the code requires the prior installation of the following R packages:
# wavethresh, mvtnorm
# 
# the main routine is: msml.bd

library(wavethresh)
library(mvtnorm)

msml.bd<-function(x, wavelet=c(1, "DaubExPhase"), scales=0, tau=-1, pp.tau=-1, del=-1, lam=-1, ep=-1){

# returns the estimates the variance function h.
#
# takes;
#
# x: data
# wavelet: choice of wavelet vectors used to compute wavelet periodograms, see package wavethresh
# scales: if 0, scales are chosen as described in the paper 
# tau: if -1, tau is chosen automatically (of the same length with scales vector)
# del: Delta_T, if -1, del is chosen automatically
# lam: Lamda_T for across-scales post-processing, if -1, lam is chosen automatically
# ep: for the correction of the bias caused by the computation of wavelet periodograms
#      if -1, ep is chosen automatically
#
# returns;
# br.list: list of detected breakpoints from each scale, from finer to coarser scales
# breakpoints: vector containing detected breakpoints  
#	after cross-scale post-processing
# estimates: list of estimated beta_i(t/T), from finer to coarser scales
	
	n<-length(x); J<-log(n, 2); criterion<-n^.251*sqrt(log(n))
	if(tau==-1){
		tau.mat<-get.tau(n, wavelet=wavelet, q=c(.95, .975))
		taus<-tau.mat[1,]
		pp.taus<-tau.mat[2,]
	} else{
		pp.taus<-taus<-tau
	}
	if(scales==0){
		scales<-J-1:floor(log(n, 2)/3)
		max.scale<-min(scales, J-floor(J/2)+1)
	} else{
		scales<-sort(J+scales, decreasing=T)
		max.scale<-min(scales)
	}
	ews<-ewspec(x, filter.number=wavelet[1], family=wavelet[2], WPsmooth=FALSE)
	WP<-ews$WavPer

	estimates<-br.list<-NULL
	for(i in 1:length(scales)){
		l<-scales[i]
		z<-accessD(WP, level=l)
		dis<-c((n-n/2^l+1):n); nz<-n-length(dis)  
		tau<-taus[i]; pp.tau<-pp.taus[i]
		u<-uh.bs(z[-dis], n=n, tau=tau, del=del, scale=l, ep=ep)
		br<-NULL
		if(length(u$breakpoints)>0){
			pp<-within.post.processing(z[-dis], n=n, u$breakpoints, pp.tau)$pp
			nc<-ncol(pp); br<-(pp[,1])[pp[,nc]>pp.tau]
		}
		br.list<-c(br.list, list(br))
		est<-u$est; est<-c(est, est[(nz-length(dis)+1):nz])
		estimates<-c(estimates, list(est))
	}
	br<-across.post.processing(rev(br.list), n, lam)

	l<-max(min(scales)-1, max.scale)
	while(l>=max.scale){
		i<-i+1
		z<-accessD(WP, leve=l)
		dis<-c((n-n/2^l+1):n) 
		nz<-n-length(dis)
		if(ep==-1) ep<-max(2*n/2^l, ceiling(sqrt(n)/2))
		nb<-length(br); ebr<-c(0, br, nz)
		tau<-taus[i]; pp.tau<-pp.taus[i]
		ind<-0
		for(k in 1:(nb+1)){
			s<-ebr[k]+ep; e<-ebr[k+1]-ep
			if(s+2<e){
				d<-max(abs(inner.prod.iter(z[s:e])))
				m<-mean(z[s:e])
				if(d/m>tau*criterion) ind<-1
			}
		}
		if(ind==1){
			tau<-taus[i]; pp.tau<-pp.taus[i]
			u<-uh.bs(z[-dis], n=n, tau=tau, del=del, scale=l, ep=ep)
			br<-NULL
			if(length(u$breakpoints)>0){
				pp<-within.post.processing(z[-dis], n=n, u$breakpoints, pp.tau)$pp
				nc<-ncol(pp); br<-(pp[,1])[pp[,nc]>pp.tau]
			}
			br.list<-c(br.list, list(br))
			est<-u$est; est<-c(est, est[(nz-length(dis)+1):nz])
			estimates<-c(estimates, list(est))
			br.next<-across.post.processing(rev(br.list), n, lam)
			if(identical(br, br.next)) break
			br<-br.next
		} else{
			break
		}
		l<-l-1
	}
	list(br.list=br.list, breakpoints=br, estimates=estimates, taus=taus, pp.taus=pp.taus)	

}



uh.bs<-function(z, n, tau, del, ep=-1, scale=-1){

# performs the breakpoint detection based on the binary segmentation procedure for each sequence

	nz<-length(z)
	if(ep==-1){
		ep<-round(max(2*n/2^scale, ceiling(sqrt(n)/2)))
	}
	hep<-round(ep/2)	
	if(del<0){
		del<-round(max(2*ep+2, ceiling(log(n)*sqrt(n)/4)))
	}
	if(nz<del) stop("Input vector too short")
	criterion<-tau*n^(0.251)*sqrt(log(n))
	
	breakpoints<-NULL
	f<-NULL
	
	tree<-list(matrix(0, 6, 1))

	tree[[1]][1,1]<-1
	s<-tree[[1]][4,1]<-1
	e<-tree[[1]][6,1]<-nz
	tree[[1]][5,1]<-b<-max.inner.prod(z[(s+hep):(e-hep)])+hep
	d<-sqrt((e-b)/(e-s+1)/(b-s+1))*sum(z[s:b])-sqrt((b-s+1)/(e-s+1)/(e-b))*sum(z[(b+1):e]) 
	m<-mean(z[s:e])	

	if(abs(d)/m>criterion){
		tree[[1]][2,1]<-d
		tree[[1]][3,1]<-m
		breakpoints<-c(breakpoints, b)
		f<-c(f, abs(d)/m)

	j<-1
	
	while(length(tree)==j){
		if(sum(tree[[j]][6, ]-tree[[j]][4, ]-rep(2*del, dim(tree[[j]])[2]))>0){
			
			no.parent.coeffs<-dim(tree[[j]])[2]
			no.child.coeffs<-0
			for(i in 1:no.parent.coeffs){
				if(tree[[j]][5, i]-tree[[j]][4, i]>del+1){
					s<-tree[[j]][4, i]
					e<-tree[[j]][5, i]
					ind.max<-max.inner.prod(z[(s+hep):(e-ep)])+hep
					b<-s+ind.max-1
					d<-max(abs(inner.prod.iter(z[(s+hep):(e-ep)])))
					m<-mean(z[(s+hep):(e-ep)])

					if(abs(d)/m>criterion){
						if(length(tree)==j) tree<-c(tree, list(matrix(0, 6, 0)))
						no.child.coeffs<-no.child.coeffs+1
						tree[[j+1]]<-matrix(c(tree[[j+1]], matrix(0, 6, 1)), 6, no.child.coeffs)
						tree[[j+1]][1, no.child.coeffs]<-2*tree[[j]][1, i]-1
						tree[[j+1]][2, no.child.coeffs]<-d<-sqrt((e-b)/(e-s+1)/(b-s+1))*sum(z[s:b])-sqrt((b-s+1)/(e-s+1)/(e-b))*sum(z[(b+1):e]) 
						tree[[j+1]][3, no.child.coeffs]<-m<-mean(z[s:e])
						tree[[j+1]][4, no.child.coeffs]<-s
						tree[[j+1]][6, no.child.coeffs]<-e
						tree[[j+1]][5, no.child.coeffs]<-b
						breakpoints<-c(breakpoints, tree[[j+1]][5, no.child.coeffs])
						f<-c(f, abs(d)/m)
					}
				}
				if(tree[[j]][6, i]-tree[[j]][5, i]>max(2*ep, del)+1){
					s<-tree[[j]][5, i]+1
					e<-tree[[j]][6, i]
					ind.max<-max.inner.prod(z[(s+ep):(e-hep)])+ep
					b<-s+ind.max-1
					d<-max(abs(inner.prod.iter(z[(s+ep):(e-hep)])))
					m<-mean(z[(s+ep):(e-hep)])

					if(abs(d)/m>criterion){
						if(length(tree)==j) tree<-c(tree, list(matrix(0, 6, 0)))
						no.child.coeffs<-no.child.coeffs+1
						tree[[j+1]]<-matrix(c(tree[[j+1]], matrix(0, 6, 1)), 6, no.child.coeffs)
						tree[[j+1]][1, no.child.coeffs]<-2*tree[[j]][1, i]
						tree[[j+1]][2, no.child.coeffs]<-d<-sqrt((e-b)/(e-s+1)/(b-s+1))*sum(z[s:b])-sqrt((b-s+1)/(e-s+1)/(e-b))*sum(z[(b+1):e])
						tree[[j+1]][3, no.child.coeffs]<-m<-mean(z[s:e])
						tree[[j+1]][4, no.child.coeffs]<-s
						tree[[j+1]][6, no.child.coeffs]<-e
						tree[[j+1]][5, no.child.coeffs]<-b
						breakpoints<-c(breakpoints, tree[[j+1]][5, no.child.coeffs])
						f<-c(f, abs(d)/m)
					}
				}
			}

		}
		j<-j+1
	}}

	B<-length(breakpoints)
	res<-NULL; rest<-breakpoints
	if(B>1){
		br<-rest[1]; res<-c(res, br)
		rest<-rest[-1]; R<-length(rest)
		while(R>0){
			if(min(abs(br-rest))<del){
				k<-which(abs(br-rest)<del)
				rest<-rest[-k]
			}
			if(length(rest)==0) break
			br<-rest[1]; res<-c(res, br)
			rest<-rest[-1]; R<-length(rest)
		}
		breakpoints<-res
	}

	rec<-rep(0, nz)
	B<-length(breakpoints)
	if(B>0){
		temp.br<-c(0, sort(breakpoints), nz)
		for(i in 1:(B+1)){
			s<-temp.br[i]+1
			e<-temp.br[i+1]
			rec[s:e]<-rec[s:e]+mean(z[s:e])
		}
	} else{
		rec<-rep(mean(z), nz)
	}
	
	list(tree=tree, est=rec, breakpoints=breakpoints, f=f)	

}

within.post.processing<-function(z, n, br=breakpoints, tau){ 

# performs the within-scale post-processing 

	nz<-length(z)
	criterion<-n^(.251)*sqrt(log(n))
	sbr<-fbr<-sort(br, decreasing=F)
	B<-L<-length(br)
	pp<-NULL
	
	pp<-matrix(fbr, B, 1)

	while(L>0){
		cbr<-c(0, fbr, nz)
		temp<-rep(0, B)
		for(i in 1:L){
			b<-cbr[i+1]; s<-cbr[i]+1; e<-cbr[i+2]
			v<-abs(sqrt((e-b)/(e-s+1)/(b-s+1))*sum(z[s:b])-sqrt((b-s+1)/(e-s+1)/(e-b))*sum(z[(b+1):e]))
			v<-v/mean(z[s:e])/criterion
			temp[sbr==b]<-v
		}
		pp<-cbind(pp, temp); fbr<-sbr[temp>tau]
		if(length(fbr)==L){ 
			break 
		} else{
			L<-length(fbr)
		}
	}

	list(pp=pp)

}

across.post.processing<-function(breaks, n, lam){ 

# performs the across-scales post-processing 

	l<-length(breaks)
	dif<-num<-B<-NULL
	for(i in 1:l){
		num<-c(num, length(breaks[[i]]))
		if(num[i]!=0){
			for(p in 1:num[i]){
				B<-rbind(B, c(i, breaks[[i]][p]))
			}
			if(num[i]>1){
				dif<-c(dif, min(abs(diff(breaks[[i]]))))
			} else{
				dif<-c(dif, Inf)
			}
		}
	}
	if(sum(num>0)==0){ 
		breakpoints<-NULL
	} 
	else{
		if(sum(num>0)==1){ 
			breakpoints<-breaks[[which(num!=0)]]
		} else{
			batch<-unique(sort(B[,2]))
			len<-length(batch)
			if(lam==-1) lam<-round(min((dif-2), ceiling(log(n)*sqrt(n)/2.5)))
			g<-1; t<-min(batch); G<-NULL
			while(t<n){
				gr<-batch[batch<=t+lam]
				t<-max(gr); ind<-(B[,1])[which(is.element(B[,2], gr))]
				batch<-batch[!is.element(batch, gr)]
				temp<-batch[batch<=t+lam]
				ind.temp<-intersect((B[,1])[which(is.element(B[,2], temp))], ind)
				while(length(temp)>0 && length(ind.temp)<0){
					gr<-c(gr, batch[batch<=t+lam])
					t<-max(gr)
					batch<-batch[!is.element(batch, gr)]; ind<-c(ind, ind.temp)
					temp<-batch[batch<=t+lam]
					ind.temp<-intersect((B[,1])[which(is.element(B[,2], temp))], ind)
				}
				G<-rbind(G, cbind(g, gr))
				g<-g+1; t<-min(batch, n)
			}
			g<-max(G[,1])
			i0<-max(which(num==max(num)))
			if(identical(as.integer(G[is.element(G[,2],B[B[,1]==i0,2]),1]),1:g)){
				breakpoints<-breaks[[i0]]
			} else{
				breakpoints<-NULL
				for(i in 1:g){
					batch<-G[G[,1]==i,2]
					if(length(batch)==1){
						br<-batch[1]
					} else{
						j<-max(B[is.element(B[,2], batch),1])
						br<-(breaks[[j]])[is.element(breaks[[j]], batch)]
					}
					breakpoints<-c(breakpoints, br)
				}
			}
		}
		breakpoints<-sort(breakpoints)
	}
	return(breakpoints)
}

get.tau<-function(n, q=.95, r=100, scales=NULL, wavelet=c(1, "DaubExPhase")){

# produces tau as described in Section 3.4

	J<-round(log(n, 2))
	if(is.null(scales)) scales<-J-1:floor(log(n, 2)/2)
	max.scale<-min(scales, J-floor(J/2)+1)
	M<-NULL
	for(i in 1:r){
		x<-rnorm(n)
		ews<-ewspec(x, filter.number=1, family="DaubExPhase", WPsmooth=FALSE)
		WP<-ews$WavPer
		m<-NULL
		for(l in scales[1]:max.scale){
			z<-accessD(WP, leve=l)
			dis<-c((n-n/2^l+1):n)  
			z<-z[-dis]; nz<-length(z)
			m<-c(m, max(abs(inner.prod.iter(z)))/mean(z)/(n^.251*sqrt(log(n))))
		}
		M<-rbind(M, m)
	}
	Sigma<-matrix(0, n, n)
	for(i in 1:n){ for(j in 1:n){
		Sigma[i,j]<-.3^abs(i-j)
	}}
	X<-rmvnorm(r, sigma=Sigma)
	for(i in 1:r){
		x<-X[i,]
		ews<-ewspec(x, filter.number=1, family="DaubExPhase", WPsmooth=FALSE)
		WP<-ews$WavPer
		m<-NULL
		for(l in scales[1]:max.scale){
			z<-accessD(WP, leve=l)
			dis<-c((n-n/2^l+1):n)  
			z<-z[-dis]; nz<-length(z)
			m<-c(m, max(abs(inner.prod.iter(z)))/mean(z)/(n^.251*sqrt(log(n))))
		}
		M<-rbind(M, m)
	}
	for(i in 1:n){ for(j in 1:n){
		Sigma[i,j]<-.6^abs(i-j)
	}}
	X<-rmvnorm(r, sigma=Sigma)
	for(i in 1:r){
		x<-X[i,]
		ews<-ewspec(x, filter.number=1, family="DaubExPhase", WPsmooth=FALSE)
		WP<-ews$WavPer
		m<-NULL
		for(l in scales[1]:max.scale){
			z<-accessD(WP, leve=l)
			dis<-c((n-n/2^l+1):n)  
			z<-z[-dis]; nz<-length(z)
			m<-c(m, max(abs(inner.prod.iter(z)))/mean(z)/(n^.251*sqrt(log(n))))
		}
		M<-rbind(M, m)
	}
	for(i in 1:n){ for(j in 1:n){
		Sigma[i,j]<-.9^abs(i-j)
	}}
	X<-rmvnorm(r, sigma=Sigma)
	for(i in 1:r){
		x<-X[i,]
		ews<-ewspec(x, filter.number=1, family="DaubExPhase", WPsmooth=FALSE)
		WP<-ews$WavPer
		m<-NULL
		for(l in scales[1]:max.scale){
			z<-accessD(WP, leve=l)
			dis<-c((n-n/2^l+1):n)  
			z<-z[-dis]; nz<-length(z)
			m<-c(m, max(abs(inner.prod.iter(z)))/mean(z)/(n^.251*sqrt(log(n))))
		}
		M<-rbind(M, m)
	}
	return(apply(M, 2, function(x){quantile(x, q)}))
}

inner.prod.iter.fisz<-function(x){

	n<-length(x)
	I.sd<-I.plus<-I.minus<-rep(0, n-1)
	I.plus[1]<-sqrt(1-1/n)*x[1]
	I.minus[1]<-1/sqrt(n^2-n)*sum(x[2:n])
	I.sd[1]<-sqrt(I.plus[1]^2/1+I.minus[1]^2/(n-1))

	if(n-2){
		for(m in 1:(n-2)){
			factor<-sqrt((n-m-1)*m/(m+1)/(n-m))
			I.plus[m+1]<-I.plus[m]*factor+x[m+1]*sqrt(1/(m+1)-1/n)
			I.minus[m+1]<-I.minus[m]/factor-x[m+1]/sqrt(n^2/(m+1)-n)
			I.sd[m+1]<-sqrt(I.plus[m+1]^2/(m+1)+I.minus[m+1]^2/(n-m-1))
		}
	}
	
	d<-I.plus-I.minus
	s<-I.plus+I.minus
	m<-mean(x)
	
	return(list(d=d, s=s, m=m))

}

max.inner.prod<-function(x){

	ipi<-inner.prod.iter(x)
	return(med(which(abs(ipi)==max(abs(ipi)))))

}

inner.prod.iter<-function(z){

	n<-length(z)
	I.plus<-I.minus<-I.prod<-rep(0, n-1)
	I.plus[1]<-sqrt(1-1/n)*z[1]
	I.minus[1]<-1/sqrt(n^2-n)*sum(z[2:n])
	if(n-2){ 
		for(m in 1:(n-2)){
			factor<-sqrt((n-m-1)*m/(m+1)/(n-m))
			I.plus[m+1]<-I.plus[m]*factor+z[m+1]*sqrt(1/(m+1)-1/n)
			I.minus[m+1]<-I.minus[m]/factor-z[m+1]/sqrt(n^2/(m +1)-n)
		}
	}
	I.prod<-I.plus-I.minus
	return(I.prod)

}

med<-function(x){

	y<-quantile(x, 0.5, type = 3)
	return(y[[1]])

}

unbal.haar.vector<-function(a){

	n<-a[3]-a[1]+1
	m<-a[2]-a[1]+1
	return(c(rep(sqrt(1/m-1/n), m), rep(-1/sqrt(n^2/m-n), n-m)))

}

max.inner.prod<-function(x){

	ipi<-inner.prod.iter(x)
	return(med(which(abs(ipi)==max(abs(ipi)))))

}

