Event-study DiD with staggered treatment design

This is another post in my series of attempts to learn the data.table package for R and to get more comfortable with base R graphics. Today, I reimplement the data generating process and one of the models in this excellent blog post by Andrew Baker: How to create relative time indicators. I also strongly recommend watching Andrew’s talk on Difference-in-Differences staggered treatment designs.

For this exercise, we will only use two libraries:

library(data.table)
library(fixest)
package ‘data.table’ was built under R version 4.5.2
package ‘fixest’ was built under R version 4.5.2

As in the original post, the data generating process includes unit and time fixed effects, as well as unit-specific treatment effects.

𝑦𝑖𝑡=𝛼𝑖+𝛼𝑡+𝜏𝑖𝑡+𝜀𝑖𝑡𝛼𝑖,𝛼𝑡𝑁(0,1)𝜀𝑖𝑡𝑁(0,0.5)𝜇𝑖𝑡𝑁(0.3,0.22)

and 𝜏𝑖𝑡 is equal to the sum of each firm-specific 𝜇𝑖𝑡 since the start of the treatment period.

We have 1000 firms split into 4 cohorts which receive treatment in 1986, 1992, 1998, and 2004.

make_data = function() {

  # unit fixed effects
  unit = data.table(
    unit    = 1:1000,
    unit_fe = rnorm(1000, 0, 1),                # unit fe
    state   = sample(1:40, 1000, replace=TRUE), # state id
    mu      = rnorm(1000, .3, .2)               # treatment effect
  )                

  # year fixed effects
  year = data.table(
    year    = 1980:2010,
    year_fe = rnorm(31, 0, 1)
  )

  # treatment groups by state
  treat_taus = data.table(
    state       = sample(1:40, 40),
    cohort_year = sort(rep(c(1986, 1992, 1998, 2004), 10))
  )

  # full interaction of unit X year
  out = CJ(
    unit = 1:1000,
    year = 1980:2010
  )
  out = merge(out, unit, by="unit")
  out = merge(out, year, by="year")
  out = merge(out, treat_taus, by="state")

  # error term, treatment indicator, and treatment effects
  out[, error := rnorm(31000, 0, .5)][
      , treat := fifelse(year >= cohort_year, 1, 0)][
      , tau   := fifelse(treat == 1, mu, 0)]

  # cumulative treatment effects
  setkey(out, unit, year)
  out[, tau_cum := cumsum(tau), by="unit"]

  # dependent variable
  out[, dep_var := unit_fe + year_fe + tau_cum + error]

  return(out)

}

Plot data from a single simulation

dat = make_data()

# empty plot window
plot(0,
     type = "n",
     ylim = c(-8, 20),
     xlim = c(1980, 2010),
     xlab = "",
     ylab = "Outcome variable")

# one line per firm
for (i in 1:1000) {
  lines(x   = dat[unit==i]$year,
        y   = dat[unit==i]$dep_var,
        col = adjustcolor("grey", alpha=.1))
}

# means by treatment cohort
cohorts = dat[, .(dep_var = mean(dep_var)), by=.(cohort_year, year)]

years <- c(
  "#CC6677" = 1986,
  "#332288" = 1992,
  "#DDCC77" = 1998,
  "#117733" = 2004
) 

for (y in years) {
  abline(v   = y,
         col = names(years)[years==y],
         lwd = 3)
  lines(x   = cohorts[cohort_year==y]$year,
        y   = cohorts[cohort_year==y]$dep_var,
        col = names(years)[years==y],
        lwd = 3)
}
# simulate data
dat = make_data()

# drop 2004 treatment
dat = dat[year <= 2003]

# years since/to treatment
dat[, rel_year := year - cohort_year]
min_year = dat[cohort_year==2004, min(rel_year)]
max_year = dat[cohort_year==2004, max(rel_year)]

# identification requires dropping two dummies
# dat[, rel_year := fifelse(!rel_year %in% c(min_year, -1), as.character(rel_year), "omitted")][
    # , rel_year := factor(rel_year)][
    # , rel_year := relevel(rel_year, ref=c("omitted")]
dat[, rel_year := factor(rel_year, sort(unique(rel_year)))]

# regression model
f = dep_var ~ rel_year | unit + year
mod = feols(f, data=dat)

# clean results
out = data.table("term" = names(coef(mod)), "estimate" = coef(mod))
out = out[ , term := as.numeric(gsub(".*year", "", term))]
out = out[term %in% -5:5]
Key: <unit, year>
       state  year  unit    unit_fe        mu    year_fe cohort_year
       <int> <int> <int>      <num>     <num>      <num>       <num>
    1:    14  1980     1  0.5423732 0.3423013 -0.3103770        1998
    2:    14  1981     1  0.5423732 0.3423013  0.5439571        1998
    3:    14  1982     1  0.5423732 0.3423013  0.8557491        1998
    4:    14  1983     1  0.5423732 0.3423013  1.5691000        1998
    5:    14  1984     1  0.5423732 0.3423013  1.9404050        1998
   ---                                                              
23996:    40  1999  1000 -1.5975231 0.2912230 -1.3023333        2004
23997:    40  2000  1000 -1.5975231 0.2912230 -0.4422467        2004
23998:    40  2001  1000 -1.5975231 0.2912230 -0.7839787        2004
23999:    40  2002  1000 -1.5975231 0.2912230  1.2958910        2004
24000:    40  2003  1000 -1.5975231 0.2912230  0.2846939        2004
             error treat   tau tau_cum    dep_var rel_year
             <num> <num> <num>   <num>      <num>    <num>
    1:  0.68724552     0     0       0  0.9192417      -18
    2:  0.35444629     0     0       0  1.4407765      -17
    3:  0.25401724     0     0       0  1.6521395      -16
    4:  0.76936151     0     0       0  2.8808347      -15
    5: -0.06761989     0     0       0  2.4151583      -14
   ---                                                    
23996: -0.29560449     0     0       0 -3.1954609       -5
23997:  0.22430993     0     0       0 -1.8154598       -4
23998:  0.70907348     0     0       0 -1.6724283       -3
23999:  0.05616511     0     0       0 -0.2454670       -2
24000: -0.25777904     0     0       0 -1.5706082       -1
Key: <unit, year>
Index: <cohort_year>
       state  year  unit    unit_fe        mu    year_fe cohort_year
       <int> <int> <int>      <num>     <num>      <num>       <num>
    1:    14  1980     1  0.5423732 0.3423013 -0.3103770        1998
    2:    14  1981     1  0.5423732 0.3423013  0.5439571        1998
    3:    14  1982     1  0.5423732 0.3423013  0.8557491        1998
    4:    14  1983     1  0.5423732 0.3423013  1.5691000        1998
    5:    14  1984     1  0.5423732 0.3423013  1.9404050        1998
   ---                                                              
23996:    40  1999  1000 -1.5975231 0.2912230 -1.3023333        2004
23997:    40  2000  1000 -1.5975231 0.2912230 -0.4422467        2004
23998:    40  2001  1000 -1.5975231 0.2912230 -0.7839787        2004
23999:    40  2002  1000 -1.5975231 0.2912230  1.2958910        2004
24000:    40  2003  1000 -1.5975231 0.2912230  0.2846939        2004
             error treat   tau tau_cum    dep_var rel_year
             <num> <num> <num>   <num>      <num>   <fctr>
    1:  0.68724552     0     0       0  0.9192417      -18
    2:  0.35444629     0     0       0  1.4407765      -17
    3:  0.25401724     0     0       0  1.6521395      -16
    4:  0.76936151     0     0       0  2.8808347      -15
    5: -0.06761989     0     0       0  2.4151583      -14
   ---                                                    
23996: -0.29560449     0     0       0 -3.1954609       -5
23997:  0.22430993     0     0       0 -1.8154598       -4
23998:  0.70907348     0     0       0 -1.6724283       -3
23999:  0.05616511     0     0       0 -0.2454670       -2
24000: -0.25777904     0     0       0 -1.5706082       -1
The variables 'rel_year12', 'rel_year13', 'rel_year14', 'rel_year15',
'rel_year16' and 'rel_year17' have been removed because of collinearity (see
$collin.var).

Plot results

# event study estimates
plot(x    = out$term,
     y    = out$estimate, 
     pch=20,
     xlim = c(-5, 5),
     ylim = c(-.5, 1.5),
     xlab = "Relative time",
     ylab = "Estimate")

# truth
truth = data.table(term=-5:5)
truth[, estimate := fifelse(term >= 0, (term + 1) * .3, 0)]
lines(x   = truth$term,
      y   = truth$estimate,
      col = "#CC6677",
      lty = "dashed")
     term estimate
    <int>    <num>
 1:    -5      0.0
 2:    -4      0.0
 3:    -3      0.0
 4:    -2      0.0
 5:    -1      0.0
 6:     0      0.3
 7:     1      0.6
 8:     2      0.9
 9:     3      1.2
10:     4      1.5
11:     5      1.8