forked from csaluski/interpretable_school_policy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatching_districts.rmd
513 lines (352 loc) · 23.9 KB
/
matching_districts.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
We will use free and reduced lunch (FRL), enrollment number (Enroll), percentage of non-whote students (Nonwhitepercent), and percentage of students passing exams (Passratebase) for our matching.
```{r}
library(MatchIt)
library(data.table)
library(dplyr)
library(stringr)
library(purrr)
library(openxlsx)
district.map.dt <- fread("Data Sources CSV/building_map_data/District MAP content area and grade all disag.csv")
district.map.dt[, State.District.ID := paste0("MO-", str_pad(COUNTY_DISTRICT, 6, "left", "0"))]
# This introduces NA for any grade outside 03 to 08. This is appropriate for
# this analysis, as we are not concerned with grades outside this range,
# however this should be considered for other forms of analysis.
district.map.dt[, GRADE_LEVEL := as.numeric(GRADE_LEVEL)]
district.map.dt[, PROFICIENT_PCT := as.numeric(PROFICIENT_PCT)]
district.map.dt[, PROFICIENT_PCT := ifelse(is.na(PROFICIENT_PCT), 0, PROFICIENT_PCT)]
district.map.dt[, ADVANCED_PCT := as.numeric(ADVANCED_PCT)]
district.map.dt[, ADVANCED_PCT := ifelse(is.na(ADVANCED_PCT), 0, ADVANCED_PCT)]
district.map.dt[, passratebase := PROFICIENT_PCT + ADVANCED_PCT]
iep.reported.2021.districts <- unique(district.map.dt[GRADE_LEVEL == 3 & TYPE == "IEP Non MAPA" & YEAR == 2021]$State.District.ID)
district.demographic.dt <- data.table(read.xlsx("Data Sources/Comparison_Group/District Demographic Data.xlsx"))
district.enrollment.dt <- data.table(read.xlsx("Data Sources/Comparison_Group/District Enrollment.xlsx"))
district.demographic.dt[, State.District.ID := paste0("MO-", str_pad(COUNTY_DISTRICT_CODE, 6, "left", "0"))]
district.enrollment.dt[, State.District.ID := paste0("MO-", str_pad(COUNTY_DISTRICT_CODE, 6, "left", "0"))]
joined.dt <- district.map.dt[district.demographic.dt, on = c("YEAR", "State.District.ID", nomatch = NULL)]
joined.dt <- joined.dt[district.enrollment.dt, on = c("YEAR", "State.District.ID", nomatch = NULL)]
# match within a year against other schools
joined.dt <- joined.dt[YEAR == 2019 | YEAR == 2018 | YEAR == 2020]
```
```{r}
dci.building.dt <- fread("Data Sources/DCI Data/Active Districts/Active_DCI_buildings_2017_2022.csv")
dci.districts.dt <- unique(dci.building.dt[, .(State.District.ID, currentSchoolYear)])
dci.districts.dt <- dci.districts.dt[order(currentSchoolYear)]
all.dci.districts <- unique(dci.districts.dt$State.District.ID)
# Account for years taken off from the program by counting along the years the
# district was actually in the program
dci.districts.dt[
,
year.in.program := 1:nrow(.SD),
by = "State.District.ID"]
dci.joined.dt <- dci.districts.dt[joined.dt, on = c("State.District.ID", "currentSchoolYear" = "YEAR"), nomatch = NA]
selected.year <- unique(dci.districts.dt[year.in.program == 1, .(currentSchoolYear, State.District.ID)])
dci.districts.2021 <- dci.districts.dt[currentSchoolYear == 2021]
dci.joined.dt[, Enroll := ENROLLMENT_GRADES_03]
dci.joined.dt[, Enroll := as.numeric(Enroll)]
dci.joined.dt[, Enroll := ifelse(is.na(Enroll), 0, Enroll)]
dci.joined.dt[, FRL := LUNCH_COUNT_FREE_REDUCED_PCT]
dci.joined.dt[, FRL := as.numeric(FRL)]
dci.joined.dt[, FRL := ifelse(is.na(FRL), 0, FRL)]
dci.joined.dt[, ENROLLMENT_WHITE_PCT := as.numeric(ENROLLMENT_WHITE_PCT)]
dci.joined.dt[, ENROLLMENT_WHITE_PCT := ifelse(is.na(ENROLLMENT_WHITE_PCT), 0, ENROLLMENT_WHITE_PCT)]
dci.joined.dt[, nonwhitepercent := 100 - ENROLLMENT_WHITE_PCT]
dci.joined.dt[, PROFICIENT_PCT := as.numeric(PROFICIENT_PCT)]
dci.joined.dt[, PROFICIENT_PCT := ifelse(is.na(PROFICIENT_PCT), 0, PROFICIENT_PCT)]
started.2019 <- selected.year[currentSchoolYear == 2019 & State.District.ID %in% dci.districts.2021$State.District.ID]
started.2020 <- selected.year[currentSchoolYear == 2020 & State.District.ID %in% dci.districts.2021$State.District.ID]
other.dci.districts.2019 <- selected.year[
!State.District.ID %in% started.2019$State.District.ID
& State.District.ID %in% all.dci.districts
]
other.dci.districts.2020 <- selected.year[
!State.District.ID %in% started.2020$State.District.ID
& State.District.ID %in% all.dci.districts
]
# Schools that started in 2019 are 1 year
# These are schools that started in fall 2018, and then reported in this data
# as the year 2019
print(table(unique(dci.joined.dt[currentSchoolYear == 2019, .(year.in.program, State.District.ID)])$year.in.program))
# Schools that started in 2020 are 1 year
# These are schools that started in fall 2019, and then reported in this data
# as the year 2020
print(table(unique(dci.joined.dt[currentSchoolYear == 2020, .(year.in.program, State.District.ID)])$year.in.program))
```
```{r}
dci.joined.dt <- dci.joined.dt[GRADE_LEVEL == 3 & CONTENT_AREA == "Eng. Language Arts" & State.District.ID %in% iep.reported.2021.districts]
dci.joined.dt[, prop.iep := REPORTABLE / Enroll]
dci.joined.dt <- dci.joined.dt[!is.infinite(prop.iep)]
# cohort 2 started treatment in 2019, so 2018 is the year for the baseline
dci.cohort2.dt <- dci.joined.dt[currentSchoolYear == 2018 & !State.District.ID %in% other.dci.districts.2019$State.District.ID]
dci.cohort2.dt[, treated := State.District.ID %in% started.2019$State.District.ID]
dci.cohort2.iep.dt <- dci.cohort2.dt[TYPE == "IEP Non MAPA"]
# cohort 3 started treatment in 2020, so 2019 is year for baseline
dci.cohort3.dt <- dci.joined.dt[currentSchoolYear == 2019 & !State.District.ID %in% other.dci.districts.2020$State.District.ID]
dci.cohort3.dt[, treated := State.District.ID %in% started.2020$State.District.ID]
dci.cohort3.iep.dt <- dci.cohort3.dt[TYPE == "IEP Non MAPA"]
```
```{r}
m.cohort2.matched.out <- matchit(treated ~ FRL + Enroll + nonwhitepercent + passratebase, data = dci.cohort2.iep.dt, method = "optimal", distance = "glm" )
m.cohort3.matched.out <- matchit(treated ~ FRL + Enroll + nonwhitepercent + passratebase, data = dci.cohort3.iep.dt, method = "optimal", distance = "glm" )
m.cohort2.report.matched.out <- matchit(treated ~ FRL + Enroll + nonwhitepercent + passratebase + REPORTABLE, data = dci.cohort2.iep.dt, method = "optimal", distance = "glm" )
m.cohort3.report.matched.out <- matchit(treated ~ FRL + Enroll + nonwhitepercent + passratebase + REPORTABLE, data = dci.cohort3.iep.dt, method = "optimal", distance = "glm" )
m.cohort2.iep.prop.matched.out <- matchit(treated ~ FRL + Enroll + nonwhitepercent + passratebase + prop.iep, data = dci.cohort2.iep.dt, method = "optimal", distance = "glm" )
m.cohort3.iep.prop.matched.out <- matchit(treated ~ FRL + Enroll + nonwhitepercent + passratebase + prop.iep, data = dci.cohort3.iep.dt, method = "optimal", distance = "glm" )
summary(m.cohort2.matched.out)
matched.cohort2.districts.dt <- match.data(m.cohort2.matched.out, group = "control")
summary(m.cohort3.matched.out)
matched.cohort3.districts.dt <- match.data(m.cohort3.matched.out, group = "control")
summary(m.cohort2.report.matched.out)
matched.cohort2.report.districts.dt <- match.data(m.cohort2.report.matched.out, group = "control")
summary(m.cohort3.report.matched.out)
matched.cohort3.report.districts.dt <- match.data(m.cohort3.report.matched.out, group = "control")
summary(m.cohort2.iep.prop.matched.out)
matched.cohort2.iep.prop.districts.dt <- match.data(m.cohort2.iep.prop.matched.out, group = "control")
summary(m.cohort3.iep.prop.matched.out)
matched.cohort3.iep.prop.districts.dt <- match.data(m.cohort3.iep.prop.matched.out, group = "control")
matched.cohort2.treated.dt <- match.data(m.cohort2.matched.out, group = "treated")
matched.cohort3.treated.dt <- match.data(m.cohort3.matched.out, group = "treated")
```
```{r}
matched.cohort2.treated.dt[, cohort := 2]
matched.cohort3.treated.dt[, cohort := 3]
matched.cohort2.districts.dt[, cohort := 2]
matched.cohort3.districts.dt[, cohort := 3]
matched.cohort2.report.districts.dt[, cohort := 2]
matched.cohort3.report.districts.dt[, cohort := 3]
matched.cohort2.iep.prop.districts.dt[, cohort := 2]
matched.cohort3.iep.prop.districts.dt[, cohort := 3]
matched.cohort2.districts.dt[, extra.var := NA]
matched.cohort3.districts.dt[, extra.var := NA]
matched.cohort2.report.districts.dt[, extra.var := "reportable"]
matched.cohort3.report.districts.dt[, extra.var := "reportable"]
matched.cohort2.iep.prop.districts.dt[, extra.var := "iep.prop"]
matched.cohort3.iep.prop.districts.dt[, extra.var := "iep.prop"]
matched.cohort2.treated.dt[, extra.var := NA]
matched.cohort3.treated.dt[, extra.var := NA]
matched.cohorts.dt <- rbind(matched.cohort2.treated.dt, matched.cohort3.treated.dt, matched.cohort2.districts.dt, matched.cohort3.districts.dt, matched.cohort2.report.districts.dt, matched.cohort3.report.districts.dt,matched.cohort2.iep.prop.districts.dt, matched.cohort3.iep.prop.districts.dt)
map.2021.result <- district.map.dt[YEAR == 2021 & TYPE == "IEP Non MAPA" & GRADE_LEVEL == 3 & CONTENT_AREA == "Eng. Language Arts"]
map.2021.result <- map.2021.result[State.District.ID %in% matched.cohorts.dt$State.District.ID]
map.2021.result[, passrate2021 := passratebase]
map.2021.result <- map.2021.result[, .(State.District.ID, passrate2021)]
matched.cohorts.dt <- matched.cohorts.dt[map.2021.result, on = c("State.District.ID")]
matched.cohorts.dt[, change := passrate2021 - passratebase]
matched.summary.dt <- matched.cohorts.dt[, unlist(recursive = F, lapply(
.(mean = mean, sd = sd, sum = sum, count = length),
function(f) lapply(.SD, f)
)), by = c("treated", "cohort", "extra.var"), .SDcols = c("FRL", "nonwhitepercent", "passratebase", "passrate2021", "change", "Enroll", "REPORTABLE", "prop.iep")]
write.csv(matched.summary.dt, file = "./Data Sources CSV/Matching/summary_different_methods.csv")
write.csv(matched.cohorts.dt, file = "./Data Sources CSV/Matching/matching_results.csv")
# get matched pairs, order by distance and cbind the tables together
treated.c2.dt <- matched.cohorts.dt[cohort == 2 & treated == T, .(State.District.ID, distance)]
untreated.c2.dt <- matched.cohorts.dt[cohort == 2 & treated == F & is.na(extra.var), .(State.District.ID, distance)]
treated.c2.dt <- treated.c2.dt[order(distance)]
untreated.c2.dt <- untreated.c2.dt[order(distance)]
setnames(treated.c2.dt, "State.District.ID", "Treated.District.ID")
setnames(untreated.c2.dt, "State.District.ID", "Matched.District.ID")
treated.c3.dt <- matched.cohorts.dt[cohort == 3 & treated == T, .(State.District.ID, distance)]
untreated.c3.dt <- matched.cohorts.dt[cohort == 3 & treated == F & is.na(extra.var), .(State.District.ID, distance)]
treated.c3.dt <- treated.c3.dt[order(distance)]
untreated.c3.dt <- untreated.c3.dt[order(distance)]
setnames(treated.c3.dt, "State.District.ID", "Treated.District.ID")
setnames(untreated.c3.dt, "State.District.ID", "Matched.District.ID")
c2.matched.districts <- cbind(treated.c2.dt, untreated.c2.dt)
c3.matched.districts <- cbind(treated.c3.dt, untreated.c3.dt)
write.csv(c2.matched.districts, "./Data Sources CSV/Matching/cohort_2_matched.csv")
write.csv(c3.matched.districts, "./Data Sources CSV/Matching/cohort_3_matched.csv")
print(t.test(matched.cohorts.dt[treated == T & cohort == 2]$change, matched.cohorts.dt[treated == F & cohort == 2 & is.na(extra.var)]$change))
print(t.test(matched.cohorts.dt[treated == T & cohort == 3]$change, matched.cohorts.dt[treated == F & cohort == 3 & is.na(extra.var)]$change))
inactive.dci.districts <- c("MO-084001", "MO-106004", "MO-058112", "MO-011079", "MO-023101", "MO-004106", "MO-069107", "MO-063067", "MO-004110", "MO-054041", "MO-078003", "MO-078013", "MO-078013", "MO-033090", "MO-033090", "MO-068073", "MO-018050", "MO-088080")
print(inactive.dci.districts[which(inactive.dci.districts %in% treated.c2.dt$Treated.District.ID)])
print(inactive.dci.districts[which(inactive.dci.districts %in% treated.c3.dt$Treated.District.ID)])
which(inactive.dci.districts %in% c3.matched.districts$State.District.ID)
```
Welch Two Sample t-test
data: matched.cohorts.dt[treated == T & cohort == 2]$change and matched.cohorts.dt[treated == F & cohort == 2 & is.na(extra.var)]$change
t = 0.1151, df = 57.017, p-value = 0.9088
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-11.13022 12.48780
sample estimates:
mean of x mean of y
-1.596970 -2.275758
Welch Two Sample t-test
data: matched.cohorts.dt[treated == T & cohort == 3]$change and matched.cohorts.dt[treated == F & cohort == 3 & is.na(extra.var)]$change
t = -1.8441, df = 141.24, p-value = 0.06726
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-15.7501038 0.5473641
sample estimates:
mean of x mean of y
-8.1506849 -0.5493151
Building level matching, use yearly building level NCES data from Melvin, and yearly building level MAP data from `./Data Sources CSV/building_map_data/` files. ESSA data is building level as well.
Create 4 matched data sets, cohort 2 and 3, and ESSA/non-ESSA
```{r}
# COUNTY_DISTRICT and SCHOOL_CODE uniquely identify a building
map.buildings.2018 <- fread("Data Sources CSV/building_map_data/Building MAP content area and grade all disag 2018.csv")
map.buildings.19.21 <- fread("Data Sources CSV/building_map_data/Building MAP content area and grade all disag 2019-2021.csv")
map.building.dt <- rbind(map.buildings.19.21, map.buildings.2018)
map.building.dt[,
State.School.ID := paste0("MO-", str_pad(COUNTY_DISTRICT, 6, "left", "0"), "-", SCHOOL_CODE, str_pad(COUNTY_DISTRICT, 6, "left", "0") )
]
map.building.dt[, PROFICIENT_PCT := as.numeric(PROFICIENT_PCT)]
map.building.dt[, PROFICIENT_PCT := ifelse(is.na(PROFICIENT_PCT), 0, PROFICIENT_PCT)]
map.building.dt[, ADVANCED_PCT := as.numeric(ADVANCED_PCT)]
map.building.dt[, ADVANCED_PCT := ifelse(is.na(ADVANCED_PCT), 0, ADVANCED_PCT)]
map.building.dt[, passratebase := PROFICIENT_PCT + ADVANCED_PCT]
iep.reported.2021.schools <- unique(map.building.dt[GRADE_LEVEL == "03" & TYPE == "IEP Non MAPA" & YEAR == 2021]$State.School.ID)
nces.demographic.dt <- data.table(read.xlsx("./Data Sources/NCES Demographic/Building Demographic Data 2006 to Current.xlsx"))
nces.enrollment.dt <- data.table(read.xlsx("./Data Sources/NCES Demographic/Building Enrollment.xlsx"))
nces.building.dt <- nces.demographic.dt[nces.enrollment.dt, on=c("COUNTY_DISTRICT_CODE", "SCHOOL_CODE", "YEAR", "DISTRICT_NAME", "SCHOOL_NAME"), nomatch = NULL]
nces.building.dt <- nces.building.dt[YEAR == 2018 | YEAR == 2019 | YEAR == 2020]
nces.building.dt[,
State.School.ID := paste0("MO-", str_pad(COUNTY_DISTRICT_CODE, 6, "left", "0"), "-", SCHOOL_CODE, str_pad(COUNTY_DISTRICT_CODE, 6, "left", "0") )
]
joined.building.dt <- nces.building.dt[map.building.dt, on = c("YEAR", "State.School.ID"), nomatch = NULL]
dci.building.dt <- fread("Data Sources/DCI Data/Active Districts/Active_DCI_buildings_2017_2022.csv")
dci.building.dt <- unique(dci.building.dt[, .(State.School.ID, currentSchoolYear)])
dci.building.dt <- dci.building.dt[order(currentSchoolYear)]
all.dci.schools <- unique(dci.building.dt$State.School.ID)
dci.building.dt[
,
year.in.program := 1:nrow(.SD),
by = "State.School.ID"]
dci.year.dt <- dci.building.dt[, .(year.in.program, currentSchoolYear, State.School.ID)]
building.selected.year <- unique(dci.building.dt[year.in.program == 1, .(currentSchoolYear, State.School.ID)])
dci.buildings.2021 <- dci.building.dt[currentSchoolYear == 2021]
dci.building.dt <- dci.building.dt[joined.building.dt, on = c("State.School.ID", "currentSchoolYear" = "YEAR"), nomatch = NA]
dci.building.dt[, Enroll := ENROLLMENT_GRADES_03]
dci.building.dt[, Enroll := as.numeric(Enroll)]
dci.building.dt[, Enroll := ifelse(is.na(Enroll), 0, Enroll)]
dci.building.dt[, FRL := LUNCH_COUNT_FREE_REDUCED_PCT]
dci.building.dt[, FRL := as.numeric(FRL)]
dci.building.dt[, FRL := ifelse(is.na(FRL), 0, FRL)]
dci.building.dt[, ENROLLMENT_WHITE_PCT := as.numeric(ENROLLMENT_WHITE_PCT)]
dci.building.dt[, ENROLLMENT_WHITE_PCT := ifelse(is.na(ENROLLMENT_WHITE_PCT), 0, ENROLLMENT_WHITE_PCT)]
dci.building.dt[, nonwhitepercent := 100 - ENROLLMENT_WHITE_PCT]
dci.building.dt[, PROFICIENT_PCT := as.numeric(PROFICIENT_PCT)]
dci.building.dt[, PROFICIENT_PCT := ifelse(is.na(PROFICIENT_PCT), 0, PROFICIENT_PCT)]
buildings.started.2019 <- building.selected.year[currentSchoolYear == 2019 & State.School.ID %in% dci.buildings.2021$State.School.ID]
buildings.started.2020 <- building.selected.year[currentSchoolYear == 2020 & State.School.ID %in% dci.buildings.2021$State.School.ID]
other.dci.buildings.2019 <- building.selected.year[
!State.School.ID %in% buildings.started.2019$State.School.ID
& State.School.ID %in% all.dci.schools
]
other.dci.buildings.2020 <- building.selected.year[
!State.School.ID %in% buildings.started.2020$State.School.ID
& State.School.ID %in% all.dci.schools
]
# When we verify ESSA data load it here, then join to dci.building.dt and
# add boolean column, ESSA in year of selection.
# This file is not part of our main existing data set, it was provided to us on
# request
essa.years.dt <- data.table(read.xlsx("./Data Sources/Building ESSA/Title 1 Schools 2018 - 2022.xlsx", sheet = "Title 1 Buildings", startRow = 5))
essa.years.dt[, State.School.ID := paste0("MO-", str_pad(ACCTYDIS, 6, "left", "0"), "-", ACSCHOOL, str_pad(ACCTYDIS, 6, "left", "0"))]
essa.year.in.program.dt <- dci.year.dt[
essa.years.dt,
on = c("currentSchoolYear" = "ACYEAR" , "State.School.ID")]
dci.building.dt <- essa.years.dt[dci.building.dt, on=c("State.School.ID", "ACYEAR" = "currentSchoolYear") ]
# Then we'll take the 4 subsets to match on, and include REPORTABLE in the
# formula now
```
```{r}
dci.building.filtered.dt <- dci.building.dt[GRADE_LEVEL == "03" & CONTENT_AREA == "Eng. Language Arts" & State.School.ID %in% iep.reported.2021.schools & Enroll != 0]
dci.building.filtered.dt[, prop.iep := REPORTABLE / Enroll]
dci.building.filtered.dt <- dci.building.filtered.dt[!is.infinite(prop.iep)]
# cohort 2 started treatment in 2019, so 2018 is the year for the baseline
dci.building.cohort2.dt <- dci.building.filtered.dt[ACYEAR == 2018 & !State.School.ID %in% other.dci.buildings.2019$State.School.ID]
dci.building.cohort2.dt[, treated := State.School.ID %in% buildings.started.2019$State.School.ID]
dci.building.cohort2.dt[, cohort := 2]
dci.building.cohort2.iep.dt <- dci.building.cohort2.dt[TYPE == "IEP Non MAPA"]
# get buildings that were labeled ESSA in same year as starting DCI, not in
# baseline year
dci.cohort2.essa.dt <- dci.building.cohort2.iep.dt[State.School.ID %in% essa.year.in.program.dt[currentSchoolYear == 2019]$State.School.ID]
dci.cohort2.non.essa.dt <- dci.building.cohort2.iep.dt[!State.School.ID %in% essa.year.in.program.dt[currentSchoolYear == 2019]$State.School.ID]
# cohort 3 started treatment in 2020, so 2019 is year for baseline
dci.building.cohort3.dt <- dci.building.filtered.dt[ACYEAR == 2019 & !State.School.ID %in% other.dci.buildings.2020$State.School.ID]
dci.building.cohort3.dt[, treated := State.School.ID %in% buildings.started.2020$State.School.ID]
dci.building.cohort3.dt[, cohort := 3]
dci.building.cohort3.iep.dt <- dci.building.cohort3.dt[TYPE == "IEP Non MAPA"]
dci.cohort3.essa.dt <- dci.building.cohort3.iep.dt[State.School.ID %in% essa.year.in.program.dt[currentSchoolYear == 2020]$State.School.ID]
dci.cohort3.non.essa.dt <- dci.building.cohort3.iep.dt[!State.School.ID %in% essa.year.in.program.dt[currentSchoolYear == 2020]$State.School.ID]
```
```{r}
m.cohort2.essa.matched.out <- matchit(treated ~ FRL + Enroll + nonwhitepercent + passratebase, data = dci.cohort2.essa.dt, method = "optimal", distance = "glm" )
m.cohort3.essa.matched.out <- matchit(treated ~ FRL + Enroll + nonwhitepercent + passratebase, data = dci.cohort3.essa.dt, method = "optimal", distance = "glm" )
summary(m.cohort2.essa.matched.out)
summary(m.cohort3.essa.matched.out)
matched.cohort2.essa.dt <- match.data(m.cohort2.essa.matched.out)
matched.cohort3.essa.dt <- match.data(m.cohort3.essa.matched.out)
```
```{r}
matched.buildings.dt <- rbind(matched.cohort2.essa.dt, matched.cohort3.essa.dt)
map.building.2021.result <- map.building.dt[YEAR == 2021 & TYPE == "IEP Non MAPA" & GRADE_LEVEL == "03" & CONTENT_AREA == "Eng. Language Arts"]
map.building.2021.result <- map.building.2021.result[State.School.ID %in% matched.buildings.dt$State.School.ID]
map.building.2021.result[, passrate2021 := passratebase]
map.building.2021.result <- map.building.2021.result[, .(State.School.ID, passrate2021)]
matched.buildings.dt <- matched.buildings.dt[map.building.2021.result, on = c("State.School.ID")]
matched.buildings.dt[, change := passrate2021 - passratebase]
matched.buildings.summary.dt <- matched.buildings.dt[, unlist(recursive = F, lapply(
.(mean = mean, sd = sd, sum = sum, count = length),
function(f) lapply(.SD, f)
)), by = c("treated", "cohort"), .SDcols = c("FRL", "nonwhitepercent", "passratebase", "passrate2021", "change", "Enroll", "REPORTABLE", "prop.iep")]
print(t.test(matched.buildings.dt[treated == T & cohort == 2]$change, matched.buildings.dt[treated == F & cohort == 2]$change))
print(t.test(matched.buildings.dt[treated == T & cohort == 3]$change, matched.buildings.dt[treated == F & cohort == 3 ]$change))
print(t.test(matched.buildings.dt[treated == T & cohort == 2]$change, matched.buildings.dt[treated == F & cohort == 2]$change, paired = T))
print(t.test(matched.buildings.dt[treated == T & cohort == 3]$change, matched.buildings.dt[treated == F & cohort == 3 ]$change, paired = T))
# Output the resulting data tables, and matched pairs of districts
write.csv(matched.buildings.summary.dt, file = "./Data Sources CSV/Matching/summary_buildings_different_methods.csv")
write.csv(matched.buildings.dt, file = "./Data Sources CSV/Matching/matching_building_results.csv")
# get matched pairs, order by distance and cbind the tables together
treated.c2.dt <- matched.buildings.dt[cohort == 2 & treated == T, .(State.School.ID, distance)]
untreated.c2.dt <- matched.buildings.dt[cohort == 2 & treated == F, .(State.School.ID, distance)]
treated.c2.dt <- treated.c2.dt[order(distance)]
untreated.c2.dt <- untreated.c2.dt[order(distance)]
setnames(treated.c2.dt, "State.School.ID", "Treated.School.ID")
setnames(untreated.c2.dt, "State.School.ID", "Matched.School.ID")
treated.c3.dt <- matched.buildings.dt[cohort == 3 & treated == T, .(State.School.ID, distance)]
untreated.c3.dt <- matched.buildings.dt[cohort == 3 & treated == F, .(State.School.ID, distance)]
treated.c3.dt <- treated.c3.dt[order(distance)]
untreated.c3.dt <- untreated.c3.dt[order(distance)]
setnames(treated.c3.dt, "State.School.ID", "Treated.School.ID")
setnames(untreated.c3.dt, "State.School.ID", "Matched.School.ID")
c2.matched.buildings <- cbind(treated.c2.dt, untreated.c2.dt)
c3.matched.buildings <- cbind(treated.c3.dt, untreated.c3.dt)
write.csv(c2.matched.buildings, "./Data Sources CSV/Matching/cohort_2_matched_buildings.csv")
write.csv(c3.matched.buildings, "./Data Sources CSV/Matching/cohort_3_matched_buildings.csv")
```
Welch Two Sample t-test
data: matched.buildings.dt[treated == T & cohort == 2]$change and matched.buildings.dt[treated == F & cohort == 2]$change
t = 1.3105, df = 117.42, p-value = 0.1926
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-2.891999 14.206753
sample estimates:
mean of x mean of y
-3.693443 -9.350820
Welch Two Sample t-test
data: matched.buildings.dt[treated == T & cohort == 3]$change and matched.buildings.dt[treated == F & cohort == 3]$change
t = -1.9733, df = 203.31, p-value = 0.04982
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-12.40086040 -0.00496484
sample estimates:
mean of x mean of y
-8.022330 -1.819417
Paired t-test
data: matched.buildings.dt[treated == T & cohort == 2]$change and matched.buildings.dt[treated == F & cohort == 2]$change
t = 1.5897, df = 60, p-value = 0.1171
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.461029 12.775783
sample estimates:
mean of the differences
5.657377
Paired t-test
data: matched.buildings.dt[treated == T & cohort == 3]$change and matched.buildings.dt[treated == F & cohort == 3]$change
t = -1.837, df = 102, p-value = 0.06912
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-12.9003305 0.4945052
sample estimates:
mean of the differences
-6.202913