# some handy functions for R and S-Plus 
# Hugo QuenŽ, h.quene@uu.nl, www.hugoquene.nl
# 2006.11.10	first collected
# 2006.12.12	function bonferroni.adjusted added
# 2007.05.30	call with source(file=url(...)) added
# 2007.11.19	function nPVI added
# 2008.09.30	function same added
# 2009.03.02	function inside added
# 2009.08.24	some functions are only defined if !exists(function)
#				this applies to functions sd, se, logit, antilogit

#
# Steps to use these functions: 
# EASY METHOD
# 	R> source(file=url("http://www.let.uu.nl/~Hugo.Quene/personal/tools/hqfunctions.r"))
#
# HARD METHOD
# 1. Save this file to a place on your computer, e.g. to 
#	E:\\mypath\\hqfunctions.r
# 2. From within R, execute commands given in the saved file,
#	by calling
#	R> source(file="e:\\mypath\\hqfunctions.r")
#	This reads commands from the indicated file instead
#	of from the standard input (keyboard). 
#
# Now the functions are defined and may be used, e.g.
#	R> y.trim <- trim(y,.05)
# Good luck! 


trim <- function(x,p) 
{
# first NAs are removed from x;
# then fraction p is removed from x at BOTH upper AND lower end of range of x
# NOTE: as a side effect, argument x is sorted ! 
	x <- x[!is.na(x)]; x<-sort(x); n<-length(x); m<-round(n*p);
	return(x[(m+1):(n-m)]) 
}


howmany.na <- function(x)
{
# returns the number of missing observations in x
	return( length(which.na(x)) ) 
}


summary.long <- function(x, trim = 0.1, geo = F, ci=.95, na.rm = T) 
{ 
# Sherry LaMonica, insightful.com
# contributed to S-Plus discussion list, item #45,
# http://spluscom.insightful.com/discussions/discussion3.cfm?topic_id=10&dept_id=6&discussionitemID=35&parentitemid=0#45
# se added 20021206 HQ
# CI added 20060210 HQ
	dn <- list(c("Sample Size", "# Missing Values", "Non-missing Sample Size", "Mean", "Median", paste(trunc(trim * 100), "% Trimmed Mean", sep = ""), "Skew", "Kurtosis", "Min", "Max", "Range", "1st Quartile", "3rd Quartile", "IQR", "Standard Deviation", "Standard Error", paste( as.character(ci*100),"% Conf.Int. Lower Bound", sep=""), paste( as.character(ci*100),"% Conf.Int. Upper Bound", sep=""), "Median Absolute Deviation", "CV"), "Statistics")
	if(geo) { 
		dn[[1]] <- append(dn[[1]], "Geometric Mean", after = 6) 
	} 
	if(length(x) == 0) { 
		sum.vec <- rep(NA, ifelse(geo, 18+2, 17+2)) 
		warning("x is an empty object") 
	} 
	else { 
		s <- summary(x) 
		n.miss <- sum(is.na(x)) 
		n <- length(x) 
		n.w.no.miss <- n - n.miss 
		if(na.rm) x <- x[!is.na(x)] 
		if(!na.rm & n.miss != 0) 
			v <- NA 
		else v <- var(x) 
		v.mle <- ((n.w.no.miss - 1)/n.w.no.miss) * v 
		sd <- sqrt(v) 
		se <- sd / sqrt(n.w.no.miss) 
		# two-sided testing
		crit <- qt( 1-((1-ci)/2), (n.w.no.miss-1) )
		lowerbound <- s[4]-crit*se
		upperbound <- s[4]+crit*se
		skew <- mean((x - s[4])^3)/(v.mle^1.5) 
		kurtosis <- (mean((x - s[4])^4)/(v.mle^2)) - 3 
		sum.vec <- c(n, n.miss, n.w.no.miss, s[4], s[3], mean(x, trim = +trim, na.rm = na.rm), skew, kurtosis, s[1], s[6], diff(range(x, na.rm = na.rm)), s[2], s[5], s[5] - s[2], sd, se, lowerbound, upperbound, mad(x), sd/s[4]) 
		if(geo) { 
			if(any(x < 0)) stop("Negative values in vector; cannot compute +geometric mean") 
			sum.vec <- append(sum.vec, geo.mean(x, na.rm = na.rm), after = 6)
		} 
	} 
	mat <- matrix(sum.vec, ncol = 1) 
	dimnames(mat) <- dn 
	return(mat)
} 


##  Define a substitute function if necessary:
if(!exists("sd", mode="function"))
	sd <- function(y,...) 
	{
	# compute and return standard deviation
		return ( sqrt(var(y,...)) ) 
	}


valid.n <- function(x)
{
	return( length(x)-length(which(is.na(x))) )
}


##  Define a substitute function if necessary:
if(!exists("se", mode="function"))
	se <- function(y,...) 
	{
	# compute and return standard error (of the mean) 
		return ( sd(y,...) / sqrt(valid.n(y)) ) 
	}


ebplot <- function (x,y,conf=.95,pch=15,col="darkgrey",ww=.10,...) 
{ 
# add average point and error bar to current plot
# Hugo Quené, hugo.quene@let.uu.nl, November 2006
# 20070618 	added ... placeholder
# 
# x    is x coordinate of point and bar
# y    is whole vector, compute average and error bar in function
# conf is width of confidence interval
# col  is color for plot symbol (pch=15) at average
# ww   is width (expressed in units of x dimension) of whiskers on error bar
# ...  is placeholder for other (graphical) parameters and arguments, e.g. bg="COLOR"
        # remove extremes and NAs in y
        y <- trim(y,0.005)
        # determine critical t value, RE conf and df
        if (length(y)<=1) { crit <- 0 }
        else {
	        crit <- qt( 1-((1-conf)/2), df=(length(y)-1) )
	        }
        # bar could also be defined as 2*sd(y), etc.
        # here, function se (see above) is called to compute standard error of y
        bar <- crit*se(y,na.rm=T) 
        # error bar is in default color (usually black)
        lines(x=rep(x,2), y=c(mean(y)-bar,mean(y)+bar), lwd=1.5 )
        lines(x=c(x-ww,x+ww), y=rep(mean(y)-bar,2), lwd=1.5 ) # lower whisker
        lines(x=c(x-ww,x+ww), y=rep(mean(y)+bar,2), lwd=1.5 ) # upper whisker
        # plot point after error bar, so that it overlaps the bar, 20070618
        # plot point is enlarged, in specified color
        points(x, mean(y), pch=pch, cex=2,  col=col, ... )
}


##  Define a substitute function if necessary:
if(!exists("logit", mode="function"))
	logit <- function(p) 
	{
	# compute and return logit of p;
	# if p=.5 then logit==0 else sign(logit)==(p>.5)
		return( log(p/(1-p)) )
	}

##  Define a substitute function if necessary:
if(!exists("antilogit", mode="function"))
	antilogit <- function(x) 
	{ 
	# compute and return antilogit of x;
	# this returns a proportion p for which logit(p)==x;
		return( exp(x)/(1+exp(x)) )
	}


resample <- function(x, size, ...)
{
# safe resample, with replacement, for bootstrap
# after example in help(sample)
       if(length(x) <= 1) { if(!missing(size) && size == 0) x[FALSE] else x
       } else sample(x, size, replace=T, ...)
}


bonferroni.adjusted <- function( alpha=.05, k=3 ) 
{
# apply bonferroni adjustement to alpha level,
# see http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1112991
# alpha is nominal alpha level, joint probability of Type I error over all comparisons
# k     is number of posthoc comparisons
	return( 1-(1-alpha)^(1/k) )
}


ci <- function( m, b, c=qnorm(1-.05/2) )
{
# report confidence interval, using critical Z score
# m is mean of data
# b is variability measure of data, such as standard error of mean
# c is critical value, default z=1.96 for 95%CI, or enter exact t value
	cat("interval (",m-c*b,m+c*b,")\n") 
}


# some aliases that I find easier to remember.
pwd <- getwd()
chdir <- function(...) { getwd(...) }


nPVI <- function ( dur=c(50,100,50,75,125) )
{
# calculate nPVI, normalized Pairwise Variability Index,
# see Grabe & Low 2002, Proc LabPhon 7
# HQ 20071119
	x <- diff(dur)
	ub <- length(raw) # upper bound of difference
	for (k in 1:ub)
		x[k] <- abs ( x[k]*2 / (dur[k]+dur[k+1]) )
	return( (100/ub)*sum(x) )
}


same <- function( x, k=3 )
{ 
# test whether k consecutive elements (triplets, quartets, etc) of x are identical;
# the return value is an integer vector of the same length as x.
# if all k values are NA, then the return value is FALSE not TRUE. 
# the return values are integers with meaning 0=false, 1=true.
# HQ 20080929
	# initialize
	if (length(x) %% k > 0)		# if remainder after division > 0
		warning( paste("length of data is not a multiple of",k) )
	maxi <- length(x) %/% k		# integer divide
	result <- as.integer( rep(NA,length(x))	)
	# loop
	for (i in 1:maxi) {
		values <- unique( x[ ((i-1)*k+1) : ((i-1)*k+k)] ) # unique values of k consecutive elements
		curr <- (length(values)==1) && ((NA %in% values)==FALSE)	# test identity, NA excluded
		result[ ((i-1)*k+1) : ((i-1)*k+k) ] <- as.integer(curr) 
		}
	return(result)
}

inside <- function ( ll=0,x,ul=1 ) 
# HQ 20090901 defaults added
{ 
# test whether x lies inside interval spanning from ll to ul (inclusive)
# HQ 20090228
	return( ll<=x & x<=ul ) 
}