noalign <- function(x) {
x <- tinytable::theme_tinytable(x)
fn <- function(table) {
if (table@output != "typst") {
return(table)
}
tab <- unlist(strsplit(table@table_string, "\\n"))
idx <- grepl("^\\s*#align\\(center, \\[\\s*$|^\\s*\\]\\) // end align\\s*$", tab)
table@table_string <- paste(tab[!idx], collapse = "\n")
return(table)
}
x <- tinytable::style_tt(x, finalize = fn)
return(x)
}
options(tinytable_tt_theme = noalign)Event-study DiD with staggered treatment design
This is another post in my series of attempts to learn the data.table package for R and to get more comfortable with base R graphics. Today, I reimplement the data generating process and one of the models in this excellent blog post by Andrew Baker: How to create relative time indicators. I also strongly recommend watching Andrew’s talk on Difference-in-Differences staggered treatment designs.
For this exercise, we will only use two libraries:
library(data.table)
library(fixest)package ‘data.table’ was built under R version 4.5.2
package ‘fixest’ was built under R version 4.5.2As in the original post, the data generating process includes unit and time fixed effects, as well as unit-specific treatment effects.
and is equal to the sum of each firm-specific since the start of the treatment period.
We have 1000 firms split into 4 cohorts which receive treatment in 1986, 1992, 1998, and 2004.
make_data = function() {
# unit fixed effects
unit = data.table(
unit = 1:1000,
unit_fe = rnorm(1000, 0, 1), # unit fe
state = sample(1:40, 1000, replace=TRUE), # state id
mu = rnorm(1000, .3, .2) # treatment effect
)
# year fixed effects
year = data.table(
year = 1980:2010,
year_fe = rnorm(31, 0, 1)
)
# treatment groups by state
treat_taus = data.table(
state = sample(1:40, 40),
cohort_year = sort(rep(c(1986, 1992, 1998, 2004), 10))
)
# full interaction of unit X year
out = CJ(
unit = 1:1000,
year = 1980:2010
)
out = merge(out, unit, by="unit")
out = merge(out, year, by="year")
out = merge(out, treat_taus, by="state")
# error term, treatment indicator, and treatment effects
out[, error := rnorm(31000, 0, .5)][
, treat := fifelse(year >= cohort_year, 1, 0)][
, tau := fifelse(treat == 1, mu, 0)]
# cumulative treatment effects
setkey(out, unit, year)
out[, tau_cum := cumsum(tau), by="unit"]
# dependent variable
out[, dep_var := unit_fe + year_fe + tau_cum + error]
return(out)
}Plot data from a single simulation
dat = make_data()
# empty plot window
plot(0,
type = "n",
ylim = c(-8, 20),
xlim = c(1980, 2010),
xlab = "",
ylab = "Outcome variable")
# one line per firm
for (i in 1:1000) {
lines(x = dat[unit==i]$year,
y = dat[unit==i]$dep_var,
col = adjustcolor("grey", alpha=.1))
}
# means by treatment cohort
cohorts = dat[, .(dep_var = mean(dep_var)), by=.(cohort_year, year)]
years <- c(
"#CC6677" = 1986,
"#332288" = 1992,
"#DDCC77" = 1998,
"#117733" = 2004
)
for (y in years) {
abline(v = y,
col = names(years)[years==y],
lwd = 3)
lines(x = cohorts[cohort_year==y]$year,
y = cohorts[cohort_year==y]$dep_var,
col = names(years)[years==y],
lwd = 3)
}# simulate data
dat = make_data()
# drop 2004 treatment
dat = dat[year <= 2003]
# years since/to treatment
dat[, rel_year := year - cohort_year]
min_year = dat[cohort_year==2004, min(rel_year)]
max_year = dat[cohort_year==2004, max(rel_year)]
# identification requires dropping two dummies
# dat[, rel_year := fifelse(!rel_year %in% c(min_year, -1), as.character(rel_year), "omitted")][
# , rel_year := factor(rel_year)][
# , rel_year := relevel(rel_year, ref=c("omitted")]
dat[, rel_year := factor(rel_year, sort(unique(rel_year)))]
# regression model
f = dep_var ~ rel_year | unit + year
mod = feols(f, data=dat)
# clean results
out = data.table("term" = names(coef(mod)), "estimate" = coef(mod))
out = out[ , term := as.numeric(gsub(".*year", "", term))]
out = out[term %in% -5:5]Key: <unit, year>
state year unit unit_fe mu year_fe cohort_year
<int> <int> <int> <num> <num> <num> <num>
1: 10 1980 1 -1.3501770 0.5119659 0.96948201 1986
2: 10 1981 1 -1.3501770 0.5119659 -0.54153906 1986
3: 10 1982 1 -1.3501770 0.5119659 2.41358183 1986
4: 10 1983 1 -1.3501770 0.5119659 -1.27261946 1986
5: 10 1984 1 -1.3501770 0.5119659 -0.56674471 1986
---
23996: 11 1999 1000 0.8193909 0.2379476 -0.17129098 2004
23997: 11 2000 1000 0.8193909 0.2379476 -2.04669219 2004
23998: 11 2001 1000 0.8193909 0.2379476 0.03897099 2004
23999: 11 2002 1000 0.8193909 0.2379476 -0.51422437 2004
24000: 11 2003 1000 0.8193909 0.2379476 1.77997749 2004
error treat tau tau_cum dep_var rel_year
<num> <num> <num> <num> <num> <num>
1: -0.81785663 0 0 0 -1.1985517 -6
2: -0.18693056 0 0 0 -2.0786467 -5
3: -0.89572831 0 0 0 0.1676765 -4
4: -0.24003381 0 0 0 -2.8628303 -3
5: 0.56185433 0 0 0 -1.3550674 -2
---
23996: 0.02316430 0 0 0 0.6712642 -5
23997: 0.68500934 0 0 0 -0.5422920 -4
23998: 0.07155696 0 0 0 0.9299188 -3
23999: -0.04203869 0 0 0 0.2631278 -2
24000: 0.01581848 0 0 0 2.6151868 -1Key: <unit, year>
Index: <cohort_year>
state year unit unit_fe mu year_fe cohort_year
<int> <int> <int> <num> <num> <num> <num>
1: 10 1980 1 -1.3501770 0.5119659 0.96948201 1986
2: 10 1981 1 -1.3501770 0.5119659 -0.54153906 1986
3: 10 1982 1 -1.3501770 0.5119659 2.41358183 1986
4: 10 1983 1 -1.3501770 0.5119659 -1.27261946 1986
5: 10 1984 1 -1.3501770 0.5119659 -0.56674471 1986
---
23996: 11 1999 1000 0.8193909 0.2379476 -0.17129098 2004
23997: 11 2000 1000 0.8193909 0.2379476 -2.04669219 2004
23998: 11 2001 1000 0.8193909 0.2379476 0.03897099 2004
23999: 11 2002 1000 0.8193909 0.2379476 -0.51422437 2004
24000: 11 2003 1000 0.8193909 0.2379476 1.77997749 2004
error treat tau tau_cum dep_var rel_year
<num> <num> <num> <num> <num> <fctr>
1: -0.81785663 0 0 0 -1.1985517 -6
2: -0.18693056 0 0 0 -2.0786467 -5
3: -0.89572831 0 0 0 0.1676765 -4
4: -0.24003381 0 0 0 -2.8628303 -3
5: 0.56185433 0 0 0 -1.3550674 -2
---
23996: 0.02316430 0 0 0 0.6712642 -5
23997: 0.68500934 0 0 0 -0.5422920 -4
23998: 0.07155696 0 0 0 0.9299188 -3
23999: -0.04203869 0 0 0 0.2631278 -2
24000: 0.01581848 0 0 0 2.6151868 -1The variables 'rel_year12', 'rel_year13', 'rel_year14', 'rel_year15',
'rel_year16' and 'rel_year17' have been removed because of collinearity (see
$collin.var).Plot results
# event study estimates
plot(x = out$term,
y = out$estimate,
pch=20,
xlim = c(-5, 5),
ylim = c(-.5, 1.5),
xlab = "Relative time",
ylab = "Estimate")
# truth
truth = data.table(term=-5:5)
truth[, estimate := fifelse(term >= 0, (term + 1) * .3, 0)]
lines(x = truth$term,
y = truth$estimate,
col = "#CC6677",
lty = "dashed") term estimate
<int> <num>
1: -5 0.0
2: -4 0.0
3: -3 0.0
4: -2 0.0
5: -1 0.0
6: 0 0.3
7: 1 0.6
8: 2 0.9
9: 3 1.2
10: 4 1.5
11: 5 1.8