Skip to content

Commit

Permalink
Finalised code for simulating and fitting the LBA
Browse files Browse the repository at this point in the history
  • Loading branch information
ballardtj committed Apr 16, 2019
1 parent 09a4cc4 commit ad784f9
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 60 deletions.
134 changes: 120 additions & 14 deletions LBA_densities.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

#PDF of the LBA model
lba_pdf = function(t, b, A, v, s){
#PDF of the LBA model

b_A_tv_ts = (b - A - t*v)/(t*s)
b_tv_ts = (b - t*v)/(t*s)
Expand All @@ -12,8 +13,8 @@ lba_pdf = function(t, b, A, v, s){
return(pdf)
}

#CDF of the LBA model
lba_cdf = function(t, b, A, v, s){
#CDF of the LBA model

b_A_tv = b - A - t*v;
b_tv = b - t*v;
Expand All @@ -28,45 +29,150 @@ lba_cdf = function(t, b, A, v, s){

}

#Function to calculate the negative summed log-likelihood for LBA
lba_log_likelihood = function(parms,data){

#print parameters being considered
print(parms)

#unpack parameters
mean_drift_rate = parms[1:2]
sd_drift_rate = c(1,1) #fixed to 1 for scaling purposes
threshold = parms[3:4]
max_start_point = parms[5]
non_decision_time = parms[6]
sd_drift_rate = 1 #fixed to 1 for scaling purposes
threshold = parms[3]
max_start_point = parms[4]
non_decision_time = parms[5]

#calculate raw threshold
raw_threshold = max_start_point + threshold

#initialise variable to store likelihoods
likelihood = rep(NA,nrow(data))

#loop through data, calculating likelihood for each observation at a time
for (i in 1:nrow(data)){

#calculate decision time by subtracting non_decision_time parameter from observed RT
decision_time = data[i,2] - non_decision_time
#only consider responses where predicted decision time is greater than 0

#only consider responses where predicted decision time is greater than 0. If less than 0,
#we assign a very low likelihood (see below)
if(decision_time > 0){
cdf = 1;
for(j in 1:length(mean_drift_rate)){

#evaluate the pdf of the observed response, and the product of 1-cdf for all other responses.
cdf = 1;
for(j in 1:length(mean_drift_rate)){
if(data[i,1] == j){
pdf = lba_pdf(decision_time, raw_threshold[j] , max_start_point, mean_drift_rate[j], sd_drift_rate[j])
pdf = lba_pdf(decision_time, raw_threshold , max_start_point, mean_drift_rate[j], sd_drift_rate)
}else{
cdf = (1-lba_cdf(decision_time, raw_threshold[j], max_start_point, mean_drift_rate[j], sd_drift_rate[j])) * cdf;
cdf = (1-lba_cdf(decision_time, raw_threshold , max_start_point, mean_drift_rate[j], sd_drift_rate)) * cdf;
}
}
#calculate the probability of all responses having a negative drift rate (in this case the RT is undefined)
prob_neg = 1;
for(j in 1:length(mean_drift_rate)){
prob_neg = pnorm(-mean_drift_rate[j]/ sd_drift_rate[j] ) * prob_neg;
prob_neg = pnorm(-mean_drift_rate[j]/ sd_drift_rate ) * prob_neg;
}

#calculate the likelihood of the observed response and response time by multiplying the pdf of the observed
#response with the product of 1-cdf of all the other responses.
likelihood[i] = pdf*cdf;

#normalise the likelihood by the probability that at least one accumulator has a positive drift rate
likelihood[i] = likelihood[i]/(1-prob_neg);
if(likelihood[i] < 1e-10){
likelihood[i] = 1e-10;

#set lower bound on the likelihood to avoid underflow issues
likelihood[i] = max(likelihood[i],1e-10)

}else{
#if decision time is less than 0, likelihood is set to very low value.
likelihood[i] = 1e-10;
}
}

#calculate the negative summed log likelihood by summing the log of each likelihood and multiplying the result by -1
out = -sum(log(likelihood));

return(out);
}

#The function below is identical to the one above but contains code to plot the predictions of the model under
#the parameters being considered

lba_log_likelihood_w_plot = function(parms,data){
print(parms)
#unpack parameters
mean_drift_rate = parms[1:2]
sd_drift_rate = 1 #fixed to 1 for scaling purposes
threshold = parms[3]
max_start_point = parms[4]
non_decision_time = parms[5]

raw_threshold = max_start_point + threshold

likelihood = rep(NA,nrow(data))

for (i in 1:nrow(data)){
decision_time = data[i,2] - non_decision_time
#only consider responses where predicted decision time is greater than 0
if(decision_time > 0){
cdf = 1;
for(j in 1:length(mean_drift_rate)){
if(data[i,1] == j){
pdf = lba_pdf(decision_time, raw_threshold , max_start_point, mean_drift_rate[j], sd_drift_rate)
}else{
cdf = (1-lba_cdf(decision_time, raw_threshold , max_start_point, mean_drift_rate[j], sd_drift_rate)) * cdf;
}
}
prob_neg = 1;
for(j in 1:length(mean_drift_rate)){
prob_neg = pnorm(-mean_drift_rate[j]/ sd_drift_rate ) * prob_neg;
}
likelihood[i] = pdf*cdf;
likelihood[i] = likelihood[i]/(1-prob_neg);
likelihood[i] = max(likelihood[i],1e-10)

}else{
likelihood[i] = 1e-10;
}
}
out = -sum(log(likelihood));

#quick way to randomly generate observations
vs = mean_drift_rate
s = sd_drift_rate
A = max_start_point
n = nrow(data)
b = threshold + max_start_point
t0 = non_decision_time
st0 = 0

n.with.extras=ceiling(n*(1+3*prod(pnorm(-vs))))
drifts=matrix(rnorm(mean=vs,sd=s,n=n.with.extras*length(vs)),ncol=length(vs),byrow=TRUE)

repeat {
drifts=rbind(drifts,matrix(rnorm(mean=vs,sd=s,n=n.with.extras*length(vs)),ncol=length(vs),byrow=TRUE))
tmp=apply(drifts,1,function(x) any(x>0))
drifts=drifts[tmp,]
if (nrow(drifts)>=n) break
}

drifts=drifts[1:n,]
drifts[drifts<0]=0
starts=matrix(runif(min=0,max=A,n=n*length(vs)),ncol=length(vs),byrow=TRUE)
ttf=t((b-t(starts)))/drifts
RT=apply(ttf,1,min)+t0+runif(min=-st0/2,max=+st0/2,n=n)
resp=apply(ttf,1,which.min)
predicted = data.frame(resp=resp,RT=RT)

data$source = "observed"
predicted$source = "predicted"

plot = bind_rows(data,predicted) %>%
filter(RT<5) %>%
ggplot() +
geom_density(aes(x=RT,group=source,colour=source),alpha=0.1) +
facet_grid(.~resp)

print(plot)
return(out);
}
97 changes: 51 additions & 46 deletions lba-sim-data-v1.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
rm(list = ls())
# code structure:
# 1. install packages (if necessary), load packages
# 2. define diffusion model functions - one for single accumulator model, one for two independent accumulators
# 2. define functions that simulate the model
# 3. set intital parameters and run
# 4. plot output RTs as histograms
#--------------------------------------------------------------------------
Expand All @@ -17,74 +17,79 @@ library(tidyverse)

# DEFINE FUNCTIONS
#--------------------------------------------------------------------------
generate_rts <- function(max_start_point, mean_drift_rates, sd_drift_rates, thresholds, non_decision_time, nTrials){
# use Euler's method to generate sample paths of the diffusion process
# each trial of the simulation is constructed as a loop that will generate simulated
# sample paths (i.e. accumulations of evidence). This loop is then applied nTrials times
generate_rts <- function(max_start_point, mean_drift_rates, sd_drift_rates, threshold, non_decision_time, nTrials){
# generates choices and response times for N trials


get.trials <- function(max_start_point, mean_drift_rates, sd_drift_rates, thresholds, non_decision_time){
# the following loop controls the evidence accumulation process. At each time_step,
# the current evidence total (T_evidene) is compared againt the value of the two absorbing
# boundaries (boundary and 0). Sample steps occur until the evidence total crosses
# one of the boundaries. Each sample of info changes the evidence total by
# an amount that is fixed across time steps (drift_rate * time_steps),
# plus a scaled random draw from a normal distribution with mean 0 and sd noise_sdev
# (a value of 0.1 or 1 are often used). Once
# evidence total crosses a boundary, the time steps and the boundary crossed are
# recorded, the non-decision time is then added to the time steps to make the predicted RT
# initialise evidence
get.trials <- function(max_start_point, mean_drift_rates, sd_drift_rates, threshold, non_decision_time){
#this function simulates the evidence accumulation process according to the LBA.

#get number of responses
#first, get number of response alternatives, which can two or more.
n_responses = length(mean_drift_rates)

#sample starting point
#sample starting point from uniform distribution bounded between 0 and the max starting point
start_point = runif(n=1,min=0,max=max_start_point)

#sample drift rates
#Need to ensure at least one drift rate is positive, because otherwise the response time is infinite
#sample drift rates for each accumulator from normal distribution.
#need to ensure at least one drift rate is positive, because otherwise the response time is infinite
#so we repeat the sampling process until at least one drift rate is positive.
repeat{
#sample drift rates
drift_rates = rnorm(n=n_responses,mean=mean_drift_rates,sd=sd_drift_rates)

#check to see if at least one is positive
if(any(drift_rates > 0) ){
break
}

}

#calculate evidence required to reach threshold
evidence_required = thresholds - start_point
#calculate raw threshold. Threshold is typically expressed as the raw threshold minus
#the maximum starting point. So we need to calculate the actual threshold (i.e., the distance
#between 0 evidence and the threshold) by summing the threshold and max starting point.
raw_threshold = threshold + max_start_point

#calculate time required for each acculator to reach threshold
#calculate evidence required to reach threshold. This is just the difference between the raw_threshold
#and the randomly sampled starting point
evidence_required = raw_threshold - start_point

#calculate time required for each acculator to reach threshold by dividing the evidence required
#(i.e. the distance) by the rate of evidence accumulation
time_required = evidence_required / drift_rates

#identify the minimum time time required. This value becomes the decision time, which represents the
#time taken for the first accumulator to hit the threshold. Here we disregard values that are negative,
#because these are only generated when the drift rate is negative. In this situation, the evidence
#never breaches the threshold
decision_time = min(time_required[time_required > 0])

#compute the response time by summing the decision time and the non decision time.
RT = decision_time + non_decision_time

#identify the response that breaches threshold
resp = which(time_required == decision_time)

out = data.frame(resp = resp, RT = RT) # assign the variables to a list
#assign the response and RT variables to a data frame
out = data.frame(resp = resp, RT = RT)

return(out)
}

# initialise a vector to collect responses
max_start_point = rep(max_start_point, times = nTrials)
tmp = lapply(max_start_point, get.trials, mean_drift_rates = mean_drift_rates,
sd_drift_rates = sd_drift_rates, thresholds = threshold, non_decision_time = non_decision_time) # apply get.trials function nTrials times
sd_drift_rates = sd_drift_rates, threshold = threshold, non_decision_time = non_decision_time) # apply get.trials function nTrials times
data = do.call(rbind, tmp) # make into a nice data frame
return(data)
}

# GENERATE DATA FROM SINGLE DRIFT MODEL

# GENERATE DATA FROM THE LBA
#--------------------------------------------------------------------------
# first, set the values for the core parameters in the diffusion model (with single drift implementation):
mean_drift_rates = c(0.7,1.3) # drift rate
sd_drift_rates = c(1,1) # standard deviation of drift rate
threshold = c(1.3,2)
max_start_point = 0.5
non_decision_time = 0.2 # non-decision time
# first, set the values for the core parameters for the LBA
mean_drift_rates = c(0.7,1.3) # drift rate for each accumlator (here we assume there are two responses)
sd_drift_rates = 1 # standard deviation of drift rates
threshold = 1.2 # response threshold
max_start_point = 0.6 # maximum starting point
non_decision_time = 0.2 # non-decision time

# set the number of trials
nTrials = 10000
Expand All @@ -93,16 +98,18 @@ nTrials = 10000
# ptm <- proc.time()
predicted.data = generate_rts(mean_drift_rates = mean_drift_rates,
sd_drift_rates = sd_drift_rates,
thresholds = thresholds,
threshold = threshold,
max_start_point = max_start_point,
non_decision_time = non_decision_time,
nTrials = nTrials)

# Stop the clock
# proc.time() - ptm
# _____________________________________________________________________________________________________
# load the previously made distribution
# load the previously generated data
load("observed_data_lba.RData")
# PLOT OUTPUT DATAs AS HISTOGRAMS

# PLOT OUTPUT DATAs AS DENSITIES
plot.observed_vs_predicted_RTs <- function(observed, predicted){

observed$source = "observed"
Expand All @@ -120,22 +127,20 @@ plot.observed_vs_predicted_RTs(observed.data, predicted.data)

# IMPLEMENT MAXIMUM LIKELIHOOD PARAMETER ESTIMATION
#-------------------------------------------------------------------
#source functions needed to calculate chi-squared
#source functions needed to calculate the likelihood
source("LBA_densities.R")

#set initial parameter values
initial_parms = c(mean_drift_rate = c(1,1),
threshold = c(1,1),
max_start_point = 0.5,
threshold = 1,
max_start_point = 0.1,
non_decision_time = 0.3)

lba_log_likelihood(initial_parms,predicted.data)

#optimise to find parameter values that are most likely given that data
optim(par = initial_parms,
fn = lba_log_likelihood,
data = observed.data,
lower = c(-10,-10, 0, 0, 0.001, 0.05),
upper = c(10,10, 10, 10, 10, 1),
lower = c(0,0, 0, 0.001, 0.05),
upper = c(5,5, 5, 5, 1),
control = list(trace=6))


parms=c(10.00, -10.00 , 0.00 , 10.00 , 0.00 , 0.05)
Binary file modified observed_data_lba.RData
Binary file not shown.

0 comments on commit ad784f9

Please sign in to comment.