diff --git a/LBA_densities.R b/LBA_densities.R index 5396428..4804569 100644 --- a/LBA_densities.R +++ b/LBA_densities.R @@ -1,5 +1,6 @@ + +#PDF of the LBA model lba_pdf = function(t, b, A, v, s){ - #PDF of the LBA model b_A_tv_ts = (b - A - t*v)/(t*s) b_tv_ts = (b - t*v)/(t*s) @@ -12,8 +13,8 @@ lba_pdf = function(t, b, A, v, s){ return(pdf) } +#CDF of the LBA model lba_cdf = function(t, b, A, v, s){ - #CDF of the LBA model b_A_tv = b - A - t*v; b_tv = b - t*v; @@ -28,45 +29,150 @@ lba_cdf = function(t, b, A, v, s){ } +#Function to calculate the negative summed log-likelihood for LBA lba_log_likelihood = function(parms,data){ + + #print parameters being considered print(parms) + #unpack parameters mean_drift_rate = parms[1:2] - sd_drift_rate = c(1,1) #fixed to 1 for scaling purposes - threshold = parms[3:4] - max_start_point = parms[5] - non_decision_time = parms[6] + sd_drift_rate = 1 #fixed to 1 for scaling purposes + threshold = parms[3] + max_start_point = parms[4] + non_decision_time = parms[5] + #calculate raw threshold raw_threshold = max_start_point + threshold + #initialise variable to store likelihoods likelihood = rep(NA,nrow(data)) + #loop through data, calculating likelihood for each observation at a time for (i in 1:nrow(data)){ + + #calculate decision time by subtracting non_decision_time parameter from observed RT decision_time = data[i,2] - non_decision_time - #only consider responses where predicted decision time is greater than 0 + + #only consider responses where predicted decision time is greater than 0. If less than 0, + #we assign a very low likelihood (see below) if(decision_time > 0){ - cdf = 1; - for(j in 1:length(mean_drift_rate)){ + + #evaluate the pdf of the observed response, and the product of 1-cdf for all other responses. + cdf = 1; + for(j in 1:length(mean_drift_rate)){ if(data[i,1] == j){ - pdf = lba_pdf(decision_time, raw_threshold[j] , max_start_point, mean_drift_rate[j], sd_drift_rate[j]) + pdf = lba_pdf(decision_time, raw_threshold , max_start_point, mean_drift_rate[j], sd_drift_rate) }else{ - cdf = (1-lba_cdf(decision_time, raw_threshold[j], max_start_point, mean_drift_rate[j], sd_drift_rate[j])) * cdf; + cdf = (1-lba_cdf(decision_time, raw_threshold , max_start_point, mean_drift_rate[j], sd_drift_rate)) * cdf; } } + #calculate the probability of all responses having a negative drift rate (in this case the RT is undefined) prob_neg = 1; for(j in 1:length(mean_drift_rate)){ - prob_neg = pnorm(-mean_drift_rate[j]/ sd_drift_rate[j] ) * prob_neg; + prob_neg = pnorm(-mean_drift_rate[j]/ sd_drift_rate ) * prob_neg; } + + #calculate the likelihood of the observed response and response time by multiplying the pdf of the observed + #response with the product of 1-cdf of all the other responses. likelihood[i] = pdf*cdf; + + #normalise the likelihood by the probability that at least one accumulator has a positive drift rate likelihood[i] = likelihood[i]/(1-prob_neg); - if(likelihood[i] < 1e-10){ - likelihood[i] = 1e-10; + + #set lower bound on the likelihood to avoid underflow issues + likelihood[i] = max(likelihood[i],1e-10) + + }else{ + #if decision time is less than 0, likelihood is set to very low value. + likelihood[i] = 1e-10; + } + } + + #calculate the negative summed log likelihood by summing the log of each likelihood and multiplying the result by -1 + out = -sum(log(likelihood)); + + return(out); +} + +#The function below is identical to the one above but contains code to plot the predictions of the model under +#the parameters being considered + +lba_log_likelihood_w_plot = function(parms,data){ + print(parms) + #unpack parameters + mean_drift_rate = parms[1:2] + sd_drift_rate = 1 #fixed to 1 for scaling purposes + threshold = parms[3] + max_start_point = parms[4] + non_decision_time = parms[5] + + raw_threshold = max_start_point + threshold + + likelihood = rep(NA,nrow(data)) + + for (i in 1:nrow(data)){ + decision_time = data[i,2] - non_decision_time + #only consider responses where predicted decision time is greater than 0 + if(decision_time > 0){ + cdf = 1; + for(j in 1:length(mean_drift_rate)){ + if(data[i,1] == j){ + pdf = lba_pdf(decision_time, raw_threshold , max_start_point, mean_drift_rate[j], sd_drift_rate) + }else{ + cdf = (1-lba_cdf(decision_time, raw_threshold , max_start_point, mean_drift_rate[j], sd_drift_rate)) * cdf; + } } + prob_neg = 1; + for(j in 1:length(mean_drift_rate)){ + prob_neg = pnorm(-mean_drift_rate[j]/ sd_drift_rate ) * prob_neg; + } + likelihood[i] = pdf*cdf; + likelihood[i] = likelihood[i]/(1-prob_neg); + likelihood[i] = max(likelihood[i],1e-10) }else{ likelihood[i] = 1e-10; } } out = -sum(log(likelihood)); + + #quick way to randomly generate observations + vs = mean_drift_rate + s = sd_drift_rate + A = max_start_point + n = nrow(data) + b = threshold + max_start_point + t0 = non_decision_time + st0 = 0 + + n.with.extras=ceiling(n*(1+3*prod(pnorm(-vs)))) + drifts=matrix(rnorm(mean=vs,sd=s,n=n.with.extras*length(vs)),ncol=length(vs),byrow=TRUE) + + repeat { + drifts=rbind(drifts,matrix(rnorm(mean=vs,sd=s,n=n.with.extras*length(vs)),ncol=length(vs),byrow=TRUE)) + tmp=apply(drifts,1,function(x) any(x>0)) + drifts=drifts[tmp,] + if (nrow(drifts)>=n) break + } + + drifts=drifts[1:n,] + drifts[drifts<0]=0 + starts=matrix(runif(min=0,max=A,n=n*length(vs)),ncol=length(vs),byrow=TRUE) + ttf=t((b-t(starts)))/drifts + RT=apply(ttf,1,min)+t0+runif(min=-st0/2,max=+st0/2,n=n) + resp=apply(ttf,1,which.min) + predicted = data.frame(resp=resp,RT=RT) + + data$source = "observed" + predicted$source = "predicted" + + plot = bind_rows(data,predicted) %>% + filter(RT<5) %>% + ggplot() + + geom_density(aes(x=RT,group=source,colour=source),alpha=0.1) + + facet_grid(.~resp) + + print(plot) return(out); } diff --git a/lba-sim-data-v1.R b/lba-sim-data-v1.R index 23e6626..1a527e6 100644 --- a/lba-sim-data-v1.R +++ b/lba-sim-data-v1.R @@ -4,7 +4,7 @@ rm(list = ls()) # code structure: # 1. install packages (if necessary), load packages -# 2. define diffusion model functions - one for single accumulator model, one for two independent accumulators +# 2. define functions that simulate the model # 3. set intital parameters and run # 4. plot output RTs as histograms #-------------------------------------------------------------------------- @@ -17,54 +17,58 @@ library(tidyverse) # DEFINE FUNCTIONS #-------------------------------------------------------------------------- -generate_rts <- function(max_start_point, mean_drift_rates, sd_drift_rates, thresholds, non_decision_time, nTrials){ - # use Euler's method to generate sample paths of the diffusion process - # each trial of the simulation is constructed as a loop that will generate simulated - # sample paths (i.e. accumulations of evidence). This loop is then applied nTrials times +generate_rts <- function(max_start_point, mean_drift_rates, sd_drift_rates, threshold, non_decision_time, nTrials){ + # generates choices and response times for N trials - get.trials <- function(max_start_point, mean_drift_rates, sd_drift_rates, thresholds, non_decision_time){ - # the following loop controls the evidence accumulation process. At each time_step, - # the current evidence total (T_evidene) is compared againt the value of the two absorbing - # boundaries (boundary and 0). Sample steps occur until the evidence total crosses - # one of the boundaries. Each sample of info changes the evidence total by - # an amount that is fixed across time steps (drift_rate * time_steps), - # plus a scaled random draw from a normal distribution with mean 0 and sd noise_sdev - # (a value of 0.1 or 1 are often used). Once - # evidence total crosses a boundary, the time steps and the boundary crossed are - # recorded, the non-decision time is then added to the time steps to make the predicted RT - # initialise evidence + get.trials <- function(max_start_point, mean_drift_rates, sd_drift_rates, threshold, non_decision_time){ + #this function simulates the evidence accumulation process according to the LBA. - #get number of responses + #first, get number of response alternatives, which can two or more. n_responses = length(mean_drift_rates) - #sample starting point + #sample starting point from uniform distribution bounded between 0 and the max starting point start_point = runif(n=1,min=0,max=max_start_point) - #sample drift rates - #Need to ensure at least one drift rate is positive, because otherwise the response time is infinite + #sample drift rates for each accumulator from normal distribution. + #need to ensure at least one drift rate is positive, because otherwise the response time is infinite + #so we repeat the sampling process until at least one drift rate is positive. repeat{ + #sample drift rates drift_rates = rnorm(n=n_responses,mean=mean_drift_rates,sd=sd_drift_rates) - + #check to see if at least one is positive if(any(drift_rates > 0) ){ break } - } - #calculate evidence required to reach threshold - evidence_required = thresholds - start_point + #calculate raw threshold. Threshold is typically expressed as the raw threshold minus + #the maximum starting point. So we need to calculate the actual threshold (i.e., the distance + #between 0 evidence and the threshold) by summing the threshold and max starting point. + raw_threshold = threshold + max_start_point - #calculate time required for each acculator to reach threshold + #calculate evidence required to reach threshold. This is just the difference between the raw_threshold + #and the randomly sampled starting point + evidence_required = raw_threshold - start_point + + #calculate time required for each acculator to reach threshold by dividing the evidence required + #(i.e. the distance) by the rate of evidence accumulation time_required = evidence_required / drift_rates + #identify the minimum time time required. This value becomes the decision time, which represents the + #time taken for the first accumulator to hit the threshold. Here we disregard values that are negative, + #because these are only generated when the drift rate is negative. In this situation, the evidence + #never breaches the threshold decision_time = min(time_required[time_required > 0]) + #compute the response time by summing the decision time and the non decision time. RT = decision_time + non_decision_time + #identify the response that breaches threshold resp = which(time_required == decision_time) - out = data.frame(resp = resp, RT = RT) # assign the variables to a list + #assign the response and RT variables to a data frame + out = data.frame(resp = resp, RT = RT) return(out) } @@ -72,19 +76,20 @@ generate_rts <- function(max_start_point, mean_drift_rates, sd_drift_rates, thre # initialise a vector to collect responses max_start_point = rep(max_start_point, times = nTrials) tmp = lapply(max_start_point, get.trials, mean_drift_rates = mean_drift_rates, - sd_drift_rates = sd_drift_rates, thresholds = threshold, non_decision_time = non_decision_time) # apply get.trials function nTrials times + sd_drift_rates = sd_drift_rates, threshold = threshold, non_decision_time = non_decision_time) # apply get.trials function nTrials times data = do.call(rbind, tmp) # make into a nice data frame return(data) } -# GENERATE DATA FROM SINGLE DRIFT MODEL + +# GENERATE DATA FROM THE LBA #-------------------------------------------------------------------------- -# first, set the values for the core parameters in the diffusion model (with single drift implementation): -mean_drift_rates = c(0.7,1.3) # drift rate -sd_drift_rates = c(1,1) # standard deviation of drift rate -threshold = c(1.3,2) -max_start_point = 0.5 -non_decision_time = 0.2 # non-decision time +# first, set the values for the core parameters for the LBA +mean_drift_rates = c(0.7,1.3) # drift rate for each accumlator (here we assume there are two responses) +sd_drift_rates = 1 # standard deviation of drift rates +threshold = 1.2 # response threshold +max_start_point = 0.6 # maximum starting point +non_decision_time = 0.2 # non-decision time # set the number of trials nTrials = 10000 @@ -93,16 +98,18 @@ nTrials = 10000 # ptm <- proc.time() predicted.data = generate_rts(mean_drift_rates = mean_drift_rates, sd_drift_rates = sd_drift_rates, - thresholds = thresholds, + threshold = threshold, max_start_point = max_start_point, non_decision_time = non_decision_time, nTrials = nTrials) + # Stop the clock # proc.time() - ptm # _____________________________________________________________________________________________________ -# load the previously made distribution +# load the previously generated data load("observed_data_lba.RData") -# PLOT OUTPUT DATAs AS HISTOGRAMS + +# PLOT OUTPUT DATAs AS DENSITIES plot.observed_vs_predicted_RTs <- function(observed, predicted){ observed$source = "observed" @@ -120,22 +127,20 @@ plot.observed_vs_predicted_RTs(observed.data, predicted.data) # IMPLEMENT MAXIMUM LIKELIHOOD PARAMETER ESTIMATION #------------------------------------------------------------------- -#source functions needed to calculate chi-squared +#source functions needed to calculate the likelihood source("LBA_densities.R") +#set initial parameter values initial_parms = c(mean_drift_rate = c(1,1), - threshold = c(1,1), - max_start_point = 0.5, + threshold = 1, + max_start_point = 0.1, non_decision_time = 0.3) -lba_log_likelihood(initial_parms,predicted.data) - +#optimise to find parameter values that are most likely given that data optim(par = initial_parms, fn = lba_log_likelihood, data = observed.data, - lower = c(-10,-10, 0, 0, 0.001, 0.05), - upper = c(10,10, 10, 10, 10, 1), + lower = c(0,0, 0, 0.001, 0.05), + upper = c(5,5, 5, 5, 1), control = list(trace=6)) - -parms=c(10.00, -10.00 , 0.00 , 10.00 , 0.00 , 0.05) diff --git a/observed_data_lba.RData b/observed_data_lba.RData index 3c473bc..125386a 100644 Binary files a/observed_data_lba.RData and b/observed_data_lba.RData differ