noalign <- function(x) {
    x <- tinytable::theme_tinytable(x)
    fn <- function(table) {
        if (table@output != "typst") {
            return(table)
        }
        tab <- unlist(strsplit(table@table_string, "\\n"))
        idx <- grepl("^\\s*#align\\(center, \\[\\s*$|^\\s*\\]\\) // end align\\s*$", tab)
        table@table_string <- paste(tab[!idx], collapse = "\n")
        return(table)
    }
    x <- tinytable::style_tt(x, finalize = fn)
    return(x)
}
options(tinytable_tt_theme = noalign)

Which R packages do scientists use?

pkgs = c(
    "renv",
    "ggplot2",
    "anytime",
    "DT",
    "here",
    "parallel",
    "patchwork",
    "data.table"
)
sapply(pkgs, require, character.only = TRUE)
theme_set(theme_minimal())
dat = readRDS("usage.rds")
n_projects = 4539
n_scripts = 19162
# n_projects = length(Sys.glob("~/Dropbox/research/dataverse/archive/*"))
# n_scripts = length(Sys.glob("~/Dropbox/research/dataverse/archive/**/*R"))

I downloaded r n_scripts R scripts from r n_projects projects hosted by the The Dataverse Project. This notebook reports usage statistics for R packages in this large sample of real-life scientific applications.

To download data from Dataverse, I adapted a script from Trisovic et al. (2022) and wrote original Python code. Then, I used the renv::dependencies() function from the renv package for R (Ushey, 2022) to extract the names of R packages used in each script.1

WARNING: This was a very quick job and I did very little quality control on the data. Please take all this with a grain of salt.

Trisovic, A., Lau, M.K., Pasquier, T. et al. A large-scale study on research code quality and execution. Sci Data 9, 60 (2022). https://doi.org/10.1038/s41597-022-01143-6
Ushey K (2022). renv: Project Environments. R package version 0.16.0, https://rstudio.github.io/renv/.

Number of projects and packages over time

dat = dat[date > anytime("2013-12-31 UTC")]
dat[, month := anytime(format(date, "%Y-%m-15"))]

projects = dat[, .(N = length(unique(dataset_id))), by = "month"]
packages = dat[, .N, by = "month"]

p1 = ggplot(projects, aes(month, N)) + 
    geom_line() +
    labs(x = "", y = "", title = "Projects")
p2 = ggplot(packages, aes(month, N)) + 
    geom_line() +
    labs(x = "", y = "", title = "Packages")
p1 + p2
       dataset_id   Package                date
            <int>    <char>              <POSc>
    1:      46760       car 2015-07-29 11:32:37
    2:      46890      epiR 2016-03-11 17:54:53
    3:      46890       irr 2016-03-11 17:54:53
    4:      46890    austin 2016-03-11 17:54:53
    5:      46935  MCMCpack 2016-03-11 17:33:12
   ---                                         
41818:    6789403   sjstats 2022-12-06 10:41:05
41819:    6789403     srvyr 2022-12-06 10:41:05
41820:    6789403 tidyverse 2022-12-06 10:41:05
41821:    6789403   weights 2022-12-06 10:41:05
41822:    6789403   writexl 2022-12-06 10:41:05
                                                                                                                                                                                                                                                                                      dataset_citation
                                                                                                                                                                                                                                                                                                <char>
    1:    Simons, Joseph; Mallinson, Daniel J., 2015, ""Replication data for: Party Control and Perverse Effects in Majority-Minority Districting: Replication Challenges When Using DW-NOMINATE"", https://doi.org/10.7910/DVN/28763, Harvard Dataverse, V1, UNF:6:COakdf2t21U/4QgnYTB5cQ== [fileUNF]
    2: Dolezal, Martin; Ennser-Jedenastik, Laurenz; Müller, Wolfgang C.; Winkler, Anna Katharina, 2016, ""Replication data for: Analyzing Manifestos in their Electoral Context: A New Approach Applied to Austria, 2002â\u0080\u00932008"", https://doi.org/10.7910/DVN/27864, Harvard Dataverse, V1
    3: Dolezal, Martin; Ennser-Jedenastik, Laurenz; Müller, Wolfgang C.; Winkler, Anna Katharina, 2016, ""Replication data for: Analyzing Manifestos in their Electoral Context: A New Approach Applied to Austria, 2002â\u0080\u00932008"", https://doi.org/10.7910/DVN/27864, Harvard Dataverse, V1
    4: Dolezal, Martin; Ennser-Jedenastik, Laurenz; Müller, Wolfgang C.; Winkler, Anna Katharina, 2016, ""Replication data for: Analyzing Manifestos in their Electoral Context: A New Approach Applied to Austria, 2002â\u0080\u00932008"", https://doi.org/10.7910/DVN/27864, Harvard Dataverse, V1
    5:                                           Schwarz, Daniel; Traber, Denise; Benoit, Kenneth, 2016, ""Replication data for: Estimating Intra-Party Preferences: Comparing Speeches to Votes"", https://doi.org/10.7910/DVN/27702, Harvard Dataverse, V1, UNF:6:lzjSYjrMMhScH7QDurVAAw== [fileUNF]
   ---                                                                                                                                                                                                                                                                                                
41818:                     Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
41819:                     Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
41820:                     Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
41821:                     Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
41822:                     Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
            month
           <POSc>
    1: 2015-07-15
    2: 2016-03-15
    3: 2016-03-15
    4: 2016-03-15
    5: 2016-03-15
   ---           
41818: 2022-12-15
41819: 2022-12-15
41820: 2022-12-15
41821: 2022-12-15
41822: 2022-12-15
'tzone' attributes are inconsistent

Usage statistics for R packages loaded at least twice

# count only one use per project
dat_count = dat[, .(date = min(date)), by = c("dataset_id", "Package")]

dat_count = dat_count[, .(`Number of times loaded` = .N), by = "Package"]
dat_count = dat_count[order(-`Number of times loaded`)]
dat_count = dat_count[`Number of times loaded` > 1]

DT::datatable(dat_count, options = list(pageLength = 50), rownames = FALSE, width = 300)
  1. 1renv was not able to parse every script.