-
Notifications
You must be signed in to change notification settings - Fork 0
/
simulate.research.world.R
128 lines (115 loc) · 4.96 KB
/
simulate.research.world.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# simulate.research.world.R
# simulate the research world
# replicate http://rsos.royalsocietypublishing.org/content/3/9/160384
# Oct 2017
library(reshape2)
library(doBy)
library(plyr)
library(ggplot2) # used by false positive
source('false.positive.R')
source('perform.communicate.mistake.R')
source('initialise.labs.R')
source('initialise.labs.random.R')
source('birth.and.death.R')
source('taxinspect.R')
source('tackle.R')
## set up all parameters
b = 0.1 # base rate probability that novel hypothesis are true
e_0 = 75 # usually 75
r_0 = 0
W_0 = 0.8
initial_random = F
r_i = 0
eta = 0.2
VN = 1
VRpos = 0.5
VRneg = 0.5
mu_e = 0.01
mu_r = 0
mu_W = 0
birth.death = 10
sigma_e = 1
sigma_r = 0.01
sigma_W = 0.01
tax.audit = FALSE # TRUE for most
increase_e_audited = 1 # 5 for most
increase_e_networked = 0.5 # 5 for most
FP.threshold = 0.67 # 0.67 for most
n.papers.min = 50 # 50 for most
n.papers.per.auditor = 10
auditor.salary = 105 # in USD $1000
audit = 200 # audit frequency (in time); 70 baseline
audit.error = 0 # 0 for most, range [0,1); -99 = random kill
mistake.baseline = 0.25 # probability of mistake when there is a false positive
# parameters for running multiple simulations:
fint = 'res' # initial part of filename
run.number = 1 # set to 1 here, but usually set to a range of numbers for multiple simulation runs
# set up times
max.time = 800000 # maximum time examined
n.results = 100 # number of results to keep
collect = max.time / n.results # frequency of data collection for plotting
# 1a) initialise labs
if(initial_random == F){labs = initialise.labs(r_0=r_0, e_0=e_0, W_0=W_0)} # Figure 3/4
if(initial_random == T){labs = initialise.labs.random(r_0=r_0, e_0=e_0, W_0=W_0)} #
# 1b) initialise hypothesis as true or false
truth <<- rbinom(n=100*max.time, size=1, prob=b) # novel hypothesis are true (global variable)
h <<- 0 # index for hypothesis (global variable)
cost <<-0 # cost of audits (global variable)
naudit <<-0 # number of papers audited
removed <<- NULL # to store data on removed labs
total.FP <<-0 # total number of false positives
total.mistake <<-0 # total number of mistakes
## loop through time
#all.past = NULL # need to store all past results for new hypothesis for confirming replications
for (t in 1:max.time){
# 2) Run a cycle of research
init = F
if (t < 5) {init = T} # no replication in early times (allow hypotheses to build up)
labs = perform.communicate(frame=labs, initial = init, r_i=r_i, eta=eta, VN=VN, VRneg=VRneg, VRpos=VRpos, mistake.baseline=mistake.baseline)
labs$t = t
# calculate totals over 100 labs for later costs
Mistakes = sum(labs$mistake, na.rm=T)
FPs = sum(labs$FP, na.rm=T)
total.FP <<- total.FP + FPs
total.mistake <<- total.mistake + Mistakes
# 3) birth and death (and mutation)
labs = birth.and.death(labs, mu_e=mu_e, mu_r=mu_r, mu_W=mu_W, sigma_e=sigma_e, sigma_W=sigma_W, sigma_r=sigma_r, birth.death=birth.death, birth.only=F)
# 4) audit (only start after 100 research cycles)
if(tax.audit == T & (t > 100) & (t %% audit == 0)){
labs = taxinspect(labs, mu_e=mu_e, mu_r=mu_r, mu_W=mu_W,
increase_e_audited = increase_e_audited, increase_e_networked = increase_e_networked, FP.threshold = FP.threshold, n.papers.min = n.papers.min,
n.papers.per.auditor = n.papers.per.auditor, auditor.salary = auditor.salary, sim=run.number,
audit.error = audit.error)
}
# keep occasional record of labs over time (also collect time 1)
if( t==1 | (t %% collect == 0) ){
cat('t=', t,'\n', sep='')
fnum = t/collect # file number
if(t==1){fnum = 0}
filename = paste(fint, run.number, '.', fnum ,'.RData', sep='')
save(labs, file=filename)
}
} # end of time loop
# calculate total papers
total.papers = max(labs$h, na.rm=T)
# save all parameters
sim.parms = data.frame(b=b, e_0=e_0, r_0=r_0, W_0=W_0, initial_random=initial_random,
max.time=max.time, r_i=r_i, eta=eta,
VN=VN, VRneg=VRneg, VRpos=VRpos,
mu_e=mu_e, mu_r=mu_r, mu_W=mu_W,
sigma_e=sigma_e, sigma_r=sigma_r, sigma_W=sigma_W, birth.death=birth.death,
tax.audit=tax.audit, increase_e_audited = increase_e_audited, increase_e_networked = increase_e_networked,
FP.threshold = FP.threshold, n.papers.min = n.papers.min,
n.papers.per.auditor = n.papers.per.auditor, auditor.salary = auditor.salary,
audit.error = audit.error, # added Oct 2017
cost=cost, audit=audit, naudit=naudit, total.papers=total.papers,
mistake.baseline = mistake.baseline) # added Oct 2017
# save meta data
outfile = paste('meta.data.', run.number, '.RData', sep='')
save(sim.parms, file=outfile)
# save data on removed labs
#outfile = paste('removed.', run.number, '.RData', sep='')
#save(removed, file=outfile)
# save data on total number of false positives and mistakes
outfile = paste('totals.', run.number, '.RData', sep='')
save(t, total.FP, total.mistake, total.papers, file=outfile)