# script for analysis of "troonredes" data
# Hugo Quené, h dot quene at uu dot nl, 2013.03.28
# 

# read input data
troonredes4 <- read.table( file=url("http://www.let.uu.nl/~Hugo.Quene/personal/troonredes/troonredes4.20130328.txt"), header=TRUE )

# create filter vector for outliers in phrase length
require(parody)
calout.detect(troonredes4$nsyl,method="medmad") # calibrated outlier detection
# $outlier.region
# [1] -11.51719  27.51719
ok <- troonredes4$nsyl<28
table(ok)
# FALSE  TRUE 
#   36  4111 

require(lme4) # for mixed-effects modeling

# model (1)
# line 1: dependent variable, artic.rate = 1/{average syll duration}
# line 2: fixed part
# line 3: random part, all fixed terms also in random part
#         as per Barr et al. (2013), J Mem Lang 68, 255-278.
# line 4: additional parameters
## Build up the model by small incremental steps...
## to avoid estimation problems, and because we need lmer30 later.
## I(x) forces computation of (x) before entering (x) into model.
## Argument REML=F is important.
lmer30 <- lmer( 1/asd ~ 
	1+
	(1|jaar), 
	data=troonredes4, subset=ok, REML=F ) # empty model with intercepts only
## in the (2nd) `formula=` argument of update, the dot works as a wildcard 
## matching the unspecified parts of the regression formula.  
lmer31 <- update( lmer30, 
	.~. +I(period3m-3)+I((period3m)^2)+I(log(nsyl)-log(8)) ) # more fixed terms
model1.31b <- update( lmer31, 
 	.~. -(1|jaar) + (1+I(period3m-3)+I((period3m-3)^2) +I(log(nsyl)-log(8))|jaar) ) # more random terms

# model (2)
# random part is same as in model (1), 
# i.e. all fixed terms NOT involving year also in random part
# jaarc is centered year of speech
model2.34b <- update( model1.31b, 
	.~. + jaarc + I(jaarc^2) + I(period3m-3):jaarc + 
    jaarc:I(jaarc^2) + I(log(nsyl)-log(8)):jaarc + 
    I(log(nsyl)-log(8)):I(jaarc^2) + I(log(nsyl)-log(8)):jaarc:I(jaarc^2) )
    
# print model 1
print(model1.31b, cor=F) # print model1
VarCorr(model1.31b) # print var-cov matrix of model1
# print model 2
print(model2.34b, cor=F) # print model2
VarCorr(model2.34b) # print var-cov matrix of model2
require(LMERConvenienceFunctions)
pamer.fnc(model2.34b) # p values of fixed terms

# compare models in Likelihood Ratio Test. 
# this LRT requires REML=F in lmer call, as per 
# Pinheiro & Bates (2000). Mixed-effects models in S and S-Plus. New York: Springer.
# section 2.4.1.
anova(model1.31b,model2.34b)

# proportional reduction of unexplained variance at level 1 (within years)
# Snijders & Bosker, p.102
(.03155+.00224+.00001+.02052+.5545) # explained var of model 1, level 1
# [1] 0.60882
(.02829+.00094+.00000+.00943+.5544) # explained var of model 2, level 1
# [1] 0.59306
1 - (0.59306/0.60882) # proportional reduction of unexplained var, level 1
# [1] 0.02588614

# proportional reduction of unexplained variance at level 2 (between years)
# Snijders & Bosker, p.103
# find representative (average) nr of fragments per year
with(troonredes4, tapply(1/asd[ok],jaar[ok],length)) -> aux
mean(aux) # 457
rm(aux) # cleanup
(.03155+.00224+.00001+.02052+.5545/457) # explained var of model 1, level 2
# [1] 0.05553335
(.02829+.00094+.00000+.00943+.5544/457) # explained var of model 2, level 2
# [1] 0.03987313
1 - (.03987313/0.05553335) # proportional reduction of unexplained var, level 2
# [1] 0.2819967

# done! 
# for plots, see separate R scripts