### R code from vignette source 'KeshtiariVasishthDataAnalysis.Rnw'
### Encoding: UTF-8

###################################################
### code chunk number 1: KeshtiariVasishthDataAnalysis.Rnw:24-26
###################################################
## make results exactly replicable
set.seed(987654321)


###################################################
### code chunk number 2: KeshtiariVasishthDataAnalysis.Rnw:29-382
###################################################
se <- function(x) 
  {
		y <- x[!is.na(x)] # remove the missing values 
	sqrt(var(as.vector(y))/length(y))
}

critroi <- read.csv("Critregions/criticalregions.csv",header=F,as.is=T)

colnames(critroi) <- c("item","cond","position")

## also look at spillover region
spillover <- critroi
spillover$pos<- spillover$pos+1

critroi$crit <- "crit"

## 110 subjects' data:
data <- read.table("data110.txt",as.is=T)

colnames(data) <- c("subj","expt","item","condition","pos","word","response","rt")

data <- subset(data,expt=="Farsi")

## remove all flawed items:
#remitem<-c(7, 12, 14, 15, 17, 20, 22, 23, 24)

data.new<-subset(data,item!=7 & item!=12& item!=14 & item!=15 & item!=17 & item!=20 & item!=22 & item!=23 & item!=24) 

## checks out
length(unique(data.new$item))

data<-data.new

## table(data$rt<250)
#202 /(29607+202)*100=0.67765

## isolate question responses:
questions <- subset(data,pos=="?")
questions$response <- as.numeric(as.character(questions$response))
summary(questions)

round(with(questions,tapply(response,condition,mean))*100)

meansubjects <- with(questions,tapply(response,subj,mean))

#barplot(meansubjects)

## subj 67 has very low performance
#meansubjects

subjacc <- data.frame(subj=1:110,acc=meansubjects)

with(questions,tapply(-1000/rt,condition,mean))

## coding
pron <- ifelse(questions$condition%in%c("a","c"),-.5,.5)
rc <- ifelse(questions$condition%in%c("a","b"),-.5,.5)
inter <- ifelse(questions$condition%in%c("b","c"),-.5,.5)

questions$pron <- pron
questions$rc <- rc
questions$inter <- inter

library(lme4)
library(car)

questions<-subset(questions,subj!=67)

(fm0.logistic <- lmer(response~pron+rc+inter+(1|subj)+(1|item),
                      family=binomial(),questions))

## question response rts:
## use inverse:
#boxcox(rt~factor(pron)*factor(rc)*subj,data=questions)

#round(with(subset(questions,rt>738),tapply(rt,condition,mean)),digits=0)

(fm0.qrts <- lmer(-1000/rt~pron+rc+inter+(1|subj)+(1|item),
                      subset(questions,rt>735)))

#(fm0.qrts <- lmer(-1000/rt~pron+rc+inter+(1|subj)+(1|item),
#                      questions))

qqPlot(residuals(fm0.qrts))

data$pos <- as.numeric(as.character(data$pos))

data <- subset(data,pos!="NA")

data$pos <- data$pos+1

dim(critroi) ## 96 rows

dim(spillover)

## map function missing:
map <- function(vec, from, to) {
       newVec <- vec
       for( i in 1:length(from) ) {
               newVec[vec == from[i]] <- to[i]
       }
       return(newVec)
}

map(as.character(critroi$cond), c('A','B','C','D'), c('a','b','c','d')) -> critroi$cond

map(as.character(spillover$cond), c('A','B','C','D'), c('a','b','c','d')) -> spillover$cond

data$condition <- as.character(data$condition)

is.character(data$condition)
is.character(critroi$cond)
is.character(spillover$cond)

data2 <- merge(data,critroi,by.x=c("item","condition","pos"),by.y=c("item","cond","position"),sort=FALSE,all.x=F)

data2.spill <- merge(data,spillover,by.x=c("item","condition","pos"),
                 by.y=c("item","cond","pos"),sort=FALSE,all.x=F)

pron <- ifelse(data2$condition%in%c("a","c"),-.5,.5)
rc <- ifelse(data2$condition%in%c("a","b"),-.5,.5)
inter <- ifelse(data2$condition%in%c("b","c"),-.5,.5)

data2$pron <- pron
data2$rc <- rc
data2$inter <- inter

pron <- ifelse(data2.spill$condition%in%c("a","c"),-.5,.5)
rc <- ifelse(data2.spill$condition%in%c("a","b"),-.5,.5)
inter <- ifelse(data2.spill$condition%in%c("b","c"),-.5,.5)

data2.spill$pron <- pron
data2.spill$rc <- rc
data2.spill$inter <- inter

## look at the by subject data:
#library(lattice)
#xyplot(log(rt)~factor(condition)|factor(subj),data2)
## remove subject 60? keep all
#bwplot(log(rt)~factor(condition)|factor(subj),data2)

library(MASS)
                          
library(ggplot2)

crit<-data2[,c(1:5,8,10:12)]
crit$region<-factor("crit")
post.crit<-data2.spill[,c(1:5,8,10:12)]
post.crit$region<-factor("post.crit")

data.final<-rbind(crit,post.crit)

##nested contrasts:
## effect of pronoun in nonRC conditions:
c1<-ifelse(data.final$condition=="a",-0.5,ifelse(data.final$condition=="b",0.5,0))
## effect of pronoun in RC conditions:
c2<-ifelse(data.final$condition=="c",-0.5,ifelse(data.final$condition=="d",0.5,0))
## effect of RC:
c3<-ifelse(data.final$condition%in%c("a","b"),-0.5,0.5)

data.final$c1<-c1
data.final$c2<-c2
data.final$c3<-c3

## a: gap,  no.rel.cl
## b: pron, no.rel.cl
## c: gap,  rel.cl
## d: pron, rel.cl

pronoun<-ifelse(data.final$condition%in%c("a","c"),"gap","pron")
rel.cl<-ifelse(data.final$condition%in%c("a","b"),"no.rel.cl","rel.cl")

data.final$pronoun<-factor(pronoun)
data.final$rel.cl<-factor(rel.cl)

## 110 subjects
length(unique(data.final$subj))

## 15 items because flawed items removed:
length(unique(data.final$item))

## inverse is OK:
#boxcox(rt~factor(pron)*factor(rc)*subj,data=subset(data.final,region=="crit"))

#boxcox(rt~factor(pron)*factor(rc)*subj,data=subset(data.final,region=="post.crit"))

(m.crit0<-lmer(-1000/rt~pron+rc+inter+(1|subj)+(1|item),subset(data.final,region=="crit")))

qqPlot(residuals(m.crit0))

## .7% of the data removed
#2610-2592
#18/2610
#0.006896552

(m.crit<-lmer(-1000/rt~pron+rc+inter+(1+pron+rc+inter|subj)+(1|item),subset(data.final,region=="crit" & rt>250)))

qqPlot(residuals(m.crit))


(m.crit.raw<-lmer(rt~pron+rc+inter+(1|subj)+(1|item),subset(data.final,region=="crit" & rt>250)))

(m.crit.log<-lmer(log(rt)~pron+rc+inter+(1|subj)+(1|item),subset(data.final,region=="crit" & rt>250)))

(m.crit.nested<-lmer(-1000/rt~c1+c2+c3+(1+c1+c2+c3|subj)+(1|item),subset(data.final,region=="crit" & rt>250)))

#subset(data.final,region=="crit" & rt<251)

#qqPlot(residuals(m.crit))
#qqPlot(residuals(m.crit.raw))
#qqPlot(residuals(m.crit.log))
#qqPlot(residuals(m.crit.nested))

## subj 47 has extraordinarily long max rt:
with(subset(data.final,region=="post.crit"),tapply(rt,IND=list(subj,pronoun),max))

(m.post.crit0<-lmer(-1000/rt~pron+rc+inter+(1|subj)+(1|item),subset(data.final,region=="post.crit")))

(m.post.crit<-lmer(-1000/rt~pron+rc+inter+(1+pron+rc+inter|subj)+(1|item),subset(data.final,region=="post.crit" & rt>250)))

(m.post.crit.max<-lmer(-1000/rt~pron+rc+inter+(1+pron+rc+inter|subj)+(1+pron+rc+inter|item),subset(data.final,region=="post.crit" & rt>250)))


(m.post.crit.log<-lmer(log(rt)~pron+rc+inter+(1|subj)+(1|item),subset(data.final,region=="post.crit"& rt>250)))

(m.post.crit.raw<-lmer(rt~pron+rc+inter+(1|subj)+(1|item),subset(data.final,region=="post.crit"& rt>250 & rt<79000)))

qqPlot(residuals(m.post.crit.raw),ylab="residuals using raw rts")

## 0.7%
#(2610-2592)/2610

(m.post.crit.nested<-lmer(-1000/rt~c1+c2+c3+(1+c1+c2+c3|subj)+(1|item),subset(data.final,region=="post.crit" & rt>250)))

(m.post.crit.nested.raw<-lmer(rt~c1+c2+c3+(1+c1+c2+c3|subj)+(1|item),subset(data.final,region=="post.crit" & rt>250)))


#qqPlot(residuals(m.post.crit.nested))

#qqPlot(residuals(m.post.crit.nested.raw))


library(reshape2)
library(reshape)

data.rs <- melt(subset(data.final,rt<79000), 
           id=c("pronoun","rel.cl","region","subj"), measure=c("rt"),na.rm=TRUE)

## raw rts computed by commenting the next line out:
data.rs$value<- -1000/data.rs$value

## remove ultra-fast rts:      
#data.rs<-subset(data.rs,value>250)      
                                                                  
data.id  <- data.frame(cast(data.rs, subj + region + pronoun + rel.cl ~ ., 
function(x) c(rt=mean(x), N=length(x) ) ))                     
                                                                                
(GM <- mean(tapply(data.id$rt, data.id$subj, mean)))

data.id <- ddply(data.id, .(subj), transform, rt.w = rt - mean(rt) + GM)  

temp<-melt(data.id, id.var=c("subj","pronoun","rel.cl","region"), measure.var="rt.w")

(M.id.w <- cast(temp, rel.cl+pronoun+region  ~ ., 
                function(x) c(M=mean(x), SE=sd(x)/sqrt(length(x)), N=length(x) ) ) ) 

library(xtable)
xtable(subset(M.id.w,region=="crit"))

xtable(subset(M.id.w,region=="post.crit"))



pd <- position_dodge(width = 0.05)
k<-1

p1<-ggplot(subset(M.id.w,region=="crit"), aes(x=pronoun, y=M,
group=rel.cl)) + 
    geom_point(position=pd,shape=21,fill="white",size=k*3) +
    geom_line(position=pd,aes(linetype=rel.cl),size=k) +
    geom_errorbar(aes(ymin=M-SE, ymax=M+SE),
                  width=.1,position=pd,size=k)+
    xlab("pronoun")+
    ylab("negative inverse reading time (-1000/rt ms^-1)")+
    scale_colour_hue(name="relative clause status", # Legend label, use darker colors
                     breaks=c("no.rel.cl", "rel.cl"),
                     labels=c("-RC", "+RC"),
                     l=40)+                     
    opts(title="Critical region") +
#    scale_y_continuous(limits=c(600,900)) +              
    theme_bw() +
    opts(legend.position=c(.87, .6)) # Position legend inside
                                    # This must go after theme_bw


p1a<-ggplot(subset(M.id.w,region=="crit"), aes(x=pronoun, y=M,
group=rel.cl)) + 
    geom_point(shape=21,fill="white",size=k*3) +
    geom_line(aes(linetype=rel.cl),size=k) +
    geom_errorbar(aes(ymin=M-2*SE, ymax=M+2*SE),
                  width=.1,size=k)+
    xlab("pronoun")+
    ylab("negative reciprocal reading time")+
    scale_colour_hue(name="relative clause status", # Legend label, use darker colors
                     breaks=c("no.rel.cl", "rel.cl"),
                     labels=c("-RC", "+RC"),
                     l=40)+                     
    opts(title="Critical region") +
#    scale_y_continuous(limits=c(600,900)) +              
    theme_bw() +
    opts(legend.position=c(.87, .6)) # Position legend inside
                                    # This must go after theme_bw

p2<-ggplot(subset(M.id.w,region=="post.crit"), aes(x=pronoun, y=M,
group=rel.cl)) + 
    geom_point(position=pd,shape=21,fill="white",size=k*3) +
    geom_line(position=pd,aes(linetype=rel.cl),size=k) +
    geom_errorbar(aes(ymin=M-SE, ymax=M+SE),
                  width=.1,position=pd,size=k)+
    xlab("pronoun")+
    ylab("reading time (ms)")+
    scale_colour_hue(name="relative clause status", # Legend label, use darker colors
                     breaks=c("no.rel.cl", "rel.cl"),
                     labels=c("No RC", "RC"),
                     l=40)+                     
    opts(title="Reading times at the post-critical region") +
#    scale_y_continuous(limits=c(600,900)) +              
    theme_bw() +
    opts(legend.position=c(.87, .6)) # Position legend inside
                                    # This must go after theme_bw

p2a<-ggplot(subset(M.id.w,region=="post.crit"), aes(x=pronoun, y=M,
group=rel.cl)) + 
    geom_point(shape=21,fill="white",size=k*3) +
    geom_line(aes(linetype=rel.cl),size=k) +
    geom_errorbar(aes(ymin=M-2*SE, ymax=M+2*SE),
                  width=.1,size=k)+
    xlab("pronoun")+
    ylab("negative reciprocal reading time")+
    scale_colour_hue(name="relative clause status", # Legend label, use darker colors
                     breaks=c("no.rel.cl", "rel.cl"),
                     labels=c("-RC", "+RC"),
                     l=40)+                     
    opts(title="Post-critical region") +
#    scale_y_continuous(limits=c(600,900)) +              
    theme_bw() +
    opts(legend.position=c(.87, .6)) # Position legend inside
                                    # This must go after theme_bw


#library(gridExtra)

#grid.arrange(p1a,p2a,nrow=2)


###################################################
### code chunk number 3: KeshtiariVasishthDataAnalysis.Rnw:402-405
###################################################
head(data.final)
m.crit0
m.post.crit0


###################################################
### code chunk number 4: KeshtiariVasishthDataAnalysis.Rnw:415-425
###################################################
m1<-m.crit0
## estimated sd of varying intercept:
(sigma.u<-attr(VarCorr(m1)$subj,"stddev"))
(sigma.w<-attr(VarCorr(m1)$item,"stddev"))

## estimated residual sd:
(sigma.e<-attr(VarCorr(m1),"sc"))

## fixed effects:
(beta<-fixef(m1))


###################################################
### code chunk number 5: KeshtiariVasishthDataAnalysis.Rnw:428-444
###################################################
crit.data<-subset(data.final,region=="crit" & rt>250)
crit.data$region<-factor(crit.data$region)

crit.data$rrt<- -1000/crit.data$rt


crit.dat <- list( subj = sort(as.integer( factor(crit.data$subj) )),
                      item = sort(as.integer( factor(crit.data$item) )),
                   rrt = crit.data$rrt,
                    pron = crit.data$pron,
                    rc = crit.data$rc,
                    inter = crit.data$inter,
                    N = nrow(crit.data),
                    I = length( unique(crit.data$subj) ),
                    K = length( unique(crit.data$item) )  
                      )


###################################################
### code chunk number 6: KeshtiariVasishthDataAnalysis.Rnw:449-465
###################################################
crit.ini <- list( list( sigma.e = sigma.e/3,
                         sigma.u = sigma.u/3,
                         sigma.w = sigma.w/3,    
                            beta = beta   /3 ),
                   list( sigma.e = sigma.e*3,
                         sigma.u = sigma.u*3,
                         sigma.w = sigma.w*3,
                            beta = beta   *3 ),
                   list( sigma.e = sigma.e/3,
                         sigma.u = sigma.u*3,
                         sigma.w = sigma.w*3,
                            beta = beta   /3 ),
                   list( sigma.e = sigma.e*3,
                         sigma.u = sigma.u/3,
                         sigma.w = sigma.w/3,
                            beta = beta   *3 ) )


###################################################
### code chunk number 7: critjagsmodelcrossedrandomintercepts
###################################################
cat("
# Fixing data to be used in model definition
model
   {
   # The model for each observational unit 
  #   (each row is a subject's data point)
     for( j in 1:N )
     {
     mu[j] <- beta[1] + beta[2] * ( pron[j] )  + beta[3] * ( rc[j] )  + beta[4] * ( inter[j] )  +  u[subj[j]] + w[item[j]]
     rrt[j] ~ dnorm( mu[j], tau.e )
     }

   # Random effects for each person
     for( i in 1:I )
     {
     u[i] ~ dnorm(0,tau.u)
     }

     # Random effects for each item
     for( k in 1:K )
     {
     w[k] ~ dnorm(0,tau.w) 
     }
  
   # Uninformative priors:
     
   # Fixed effect intercept and slope
     beta[1] ~ dnorm(0.0,1.0E-5)
     beta[2] ~ dnorm(0.0,1.0E-5)
     beta[3] ~ dnorm(0.0,1.0E-5)
     beta[4] ~ dnorm(0.0,1.0E-5)
    

   # Residual (within-person) variance
     tau.e <- pow(sigma.e,-2)
     sigma.e  ~ dunif(0,100)

   # Between-person variation
     tau.u <- pow(sigma.u,-2)
     sigma.u  ~ dunif(0,10)   

     # Between-item variation
     tau.w <- pow(sigma.w,-2)
     sigma.w  ~ dunif(0,10)     
   }",
     file="critcrossedrandom.jag" )

track.variables<-c("beta","sigma.e","sigma.u","sigma.w")

library(rjags)
system.time(
crit.mod <- jags.model( file = "critcrossedrandom.jag",
                         data = crit.dat,
                     n.chains = 4,
                        inits = crit.ini,
                      n.adapt =2000 ))

system.time(
crit.res <- coda.samples(crit.mod,
                          var = track.variables,
                          n.iter = 10000,
                          thin = 20))

summary( crit.res )

str(crit.res)


###################################################
### code chunk number 8: KeshtiariVasishthDataAnalysis.Rnw:539-563
###################################################
post<-jags.samples(crit.mod,
                   var=track.variables,
                   n.iter=10000)

## pronoun:
## chain 1:
counts<-table(post$beta[2,,][,1]<0)
100*counts[2]/(sum(counts))

## checks out: P(theta<0):
pnorm(0,mean=-0.0892,sd=0.0517)

## rc:
#hist(post$beta[3,,][,4])
median(post$beta[3,,][,4])
counts<-table(post$beta[3,,][,4]<0)
100*counts[2]/(sum(counts))

## interaction:
hist(post$beta[4,,][,4])
median(post$beta[4,,][,4])
counts<-table(post$beta[4,,][,4]<0)
100*counts[2]/(sum(counts))



###################################################
### code chunk number 9: KeshtiariVasishthDataAnalysis.Rnw:566-568
###################################################
par( mfrow=c(3,3) )
plot(crit.res)


###################################################
### code chunk number 10: KeshtiariVasishthDataAnalysis.Rnw:575-584
###################################################
m1<-m.post.crit0
## estimated sd of varying intercept:
(sigma.u<-attr(VarCorr(m1)$subj,"stddev"))
(sigma.w<-attr(VarCorr(m1)$item,"stddev"))

## estimated residual sd:
(sigma.e<-attr(VarCorr(m1),"sc"))

(beta<-fixef(m1))


###################################################
### code chunk number 11: KeshtiariVasishthDataAnalysis.Rnw:589-605
###################################################
crit.data<-subset(data.final,region=="post.crit" & rt>250)
crit.data$region<-factor(crit.data$region)

crit.data$rrt<- -1000/crit.data$rt


crit.dat <- list( subj = sort(as.integer( factor(crit.data$subj) )),
                      item = sort(as.integer( factor(crit.data$item) )),
                   rrt = crit.data$rrt,
                    pron = crit.data$pron,
                    rc = crit.data$rc,
                    inter = crit.data$inter,
                    N = nrow(crit.data),
                    I = length( unique(crit.data$subj) ),
                    K = length( unique(crit.data$item) )  
                      )


###################################################
### code chunk number 12: KeshtiariVasishthDataAnalysis.Rnw:610-626
###################################################
crit.ini <- list( list( sigma.e = sigma.e/3,
                         sigma.u = sigma.u/3,
                         sigma.w = sigma.w/3,    
                            beta = beta   /3 ),
                   list( sigma.e = sigma.e*3,
                         sigma.u = sigma.u*3,
                         sigma.w = sigma.w*3,
                            beta = beta   *3 ),
                   list( sigma.e = sigma.e/3,
                         sigma.u = sigma.u*3,
                         sigma.w = sigma.w*3,
                            beta = beta   /3 ),
                   list( sigma.e = sigma.e*3,
                         sigma.u = sigma.u/3,
                         sigma.w = sigma.w/3,
                            beta = beta   *3 ) )


###################################################
### code chunk number 13: postcritjagsmodelcrossedrandomintercepts
###################################################
cat("
# Fixing data to be used in model definition
model
   {
   # The model for each observational unit 
  #   (each row is a subject's data point)
     for( j in 1:N )
     {
     mu[j] <- beta[1] + beta[2] * ( pron[j] )  + beta[3] * ( rc[j] )  + beta[4] * ( inter[j] )  +  u[subj[j]] + w[item[j]]
     rrt[j] ~ dnorm( mu[j], tau.e )
     }

   # Random effects for each person
     for( i in 1:I )
     {
     u[i] ~ dnorm(0,tau.u)
     }

     # Random effects for each item
     for( k in 1:K )
     {
     w[k] ~ dnorm(0,tau.w) 
     }
  
   # Uninformative priors:
     
   # Fixed effect intercept and slope
     beta[1] ~ dnorm(0.0,1.0E-5)
     beta[2] ~ dnorm(0.0,1.0E-5)
     beta[3] ~ dnorm(0.0,1.0E-5)
     beta[4] ~ dnorm(0.0,1.0E-5)
    

   # Residual (within-person) variance
     tau.e <- pow(sigma.e,-2)
     sigma.e  ~ dunif(0,100)

   # Between-person variation
     tau.u <- pow(sigma.u,-2)
     sigma.u  ~ dunif(0,10)   

     # Between-item variation
     tau.w <- pow(sigma.w,-2)
     sigma.w  ~ dunif(0,10)     
   }",
     file="postcritcrossedrandom.jag" )

track.variables<-c("beta","sigma.e","sigma.u","sigma.w")

library(rjags)
system.time(
crit.mod <- jags.model( file = "postcritcrossedrandom.jag",
                         data = crit.dat,
                     n.chains = 4,
                        inits = crit.ini,
                      n.adapt =2000 ))

system.time(
crit.res <- coda.samples(crit.mod,
                          var = track.variables,
                          n.iter = 10000,
                          thin = 20))

summary( crit.res )


###################################################
### code chunk number 14: KeshtiariVasishthDataAnalysis.Rnw:700-722
###################################################
post<-jags.samples(crit.mod,
                   var=track.variables,
                   n.iter=10000)

## pronoun:
## chain 1:
#hist(post$beta[2,,][,1])
counts<-table(post$beta[2,,][,1]<0)
100*counts[2]/(sum(counts))

## rc:
#hist(post$beta[3,,][,1])
#median(post$beta[3,,][,  ])
counts<-table(post$beta[3,,][,1]<0)
100*counts[2]/(sum(counts))

## interaction:
#hist(post$beta[4,,][,4])
#median(post$beta[4,,][,4])
counts<-table(post$beta[4,,][,1]<0)
100*counts[2]/(sum(counts))



###################################################
### code chunk number 15: KeshtiariVasishthDataAnalysis.Rnw:726-728
###################################################
par( mfrow=c(3,3) )
plot(crit.res)


