noalign <- function(x) {
x <- tinytable::theme_tinytable(x)
fn <- function(table) {
if (table@output != "typst") {
return(table)
}
tab <- unlist(strsplit(table@table_string, "\\n"))
idx <- grepl("^\\s*#align\\(center, \\[\\s*$|^\\s*\\]\\) // end align\\s*$", tab)
table@table_string <- paste(tab[!idx], collapse = "\n")
return(table)
}
x <- tinytable::style_tt(x, finalize = fn)
return(x)
}
options(tinytable_tt_theme = noalign)Which R packages do scientists use?
pkgs = c(
"renv",
"ggplot2",
"anytime",
"DT",
"here",
"parallel",
"patchwork",
"data.table"
)
sapply(pkgs, require, character.only = TRUE)
theme_set(theme_minimal())
dat = readRDS("usage.rds")
n_projects = 4539
n_scripts = 19162
# n_projects = length(Sys.glob("~/Dropbox/research/dataverse/archive/*"))
# n_scripts = length(Sys.glob("~/Dropbox/research/dataverse/archive/**/*R"))I downloaded r n_scripts R scripts from r n_projects projects hosted by the The Dataverse Project. This notebook reports usage statistics for R packages in this large sample of real-life scientific applications.
To download data from Dataverse, I adapted a script from Trisovic et al. (2022) and wrote original Python code. Then, I used the renv::dependencies() function from the renv package for R (Ushey, 2022) to extract the names of R packages used in each script.1
WARNING: This was a very quick job and I did very little quality control on the data. Please take all this with a grain of salt.
Trisovic, A., Lau, M.K., Pasquier, T. et al. A large-scale study on research code quality and execution. Sci Data 9, 60 (2022). https://doi.org/10.1038/s41597-022-01143-6
Ushey K (2022). renv: Project Environments. R package version 0.16.0, https://rstudio.github.io/renv/.
Number of projects and packages over time
dat = dat[date > anytime("2013-12-31 UTC")]
dat[, month := anytime(format(date, "%Y-%m-15"))]
projects = dat[, .(N = length(unique(dataset_id))), by = "month"]
packages = dat[, .N, by = "month"]
p1 = ggplot(projects, aes(month, N)) +
geom_line() +
labs(x = "", y = "", title = "Projects")
p2 = ggplot(packages, aes(month, N)) +
geom_line() +
labs(x = "", y = "", title = "Packages")
p1 + p2 dataset_id Package date
<int> <char> <POSc>
1: 46760 car 2015-07-29 11:32:37
2: 46890 epiR 2016-03-11 17:54:53
3: 46890 irr 2016-03-11 17:54:53
4: 46890 austin 2016-03-11 17:54:53
5: 46935 MCMCpack 2016-03-11 17:33:12
---
41818: 6789403 sjstats 2022-12-06 10:41:05
41819: 6789403 srvyr 2022-12-06 10:41:05
41820: 6789403 tidyverse 2022-12-06 10:41:05
41821: 6789403 weights 2022-12-06 10:41:05
41822: 6789403 writexl 2022-12-06 10:41:05
dataset_citation
<char>
1: Simons, Joseph; Mallinson, Daniel J., 2015, ""Replication data for: Party Control and Perverse Effects in Majority-Minority Districting: Replication Challenges When Using DW-NOMINATE"", https://doi.org/10.7910/DVN/28763, Harvard Dataverse, V1, UNF:6:COakdf2t21U/4QgnYTB5cQ== [fileUNF]
2: Dolezal, Martin; Ennser-Jedenastik, Laurenz; Müller, Wolfgang C.; Winkler, Anna Katharina, 2016, ""Replication data for: Analyzing Manifestos in their Electoral Context: A New Approach Applied to Austria, 2002â\u0080\u00932008"", https://doi.org/10.7910/DVN/27864, Harvard Dataverse, V1
3: Dolezal, Martin; Ennser-Jedenastik, Laurenz; Müller, Wolfgang C.; Winkler, Anna Katharina, 2016, ""Replication data for: Analyzing Manifestos in their Electoral Context: A New Approach Applied to Austria, 2002â\u0080\u00932008"", https://doi.org/10.7910/DVN/27864, Harvard Dataverse, V1
4: Dolezal, Martin; Ennser-Jedenastik, Laurenz; Müller, Wolfgang C.; Winkler, Anna Katharina, 2016, ""Replication data for: Analyzing Manifestos in their Electoral Context: A New Approach Applied to Austria, 2002â\u0080\u00932008"", https://doi.org/10.7910/DVN/27864, Harvard Dataverse, V1
5: Schwarz, Daniel; Traber, Denise; Benoit, Kenneth, 2016, ""Replication data for: Estimating Intra-Party Preferences: Comparing Speeches to Votes"", https://doi.org/10.7910/DVN/27702, Harvard Dataverse, V1, UNF:6:lzjSYjrMMhScH7QDurVAAw== [fileUNF]
---
41818: Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
41819: Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
41820: Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
41821: Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
41822: Stefkovics, Adam, 2022, ""Global warming vs. climate change frames. Revisiting framing effects based on new experimental evidence collected in 30 European countries"", https://doi.org/10.7910/DVN/OYWFB9, Harvard Dataverse, V1, UNF:6:q7NQHWV34EwjSer8wMmT+g== [fileUNF]
month
<POSc>
1: 2015-07-15
2: 2016-03-15
3: 2016-03-15
4: 2016-03-15
5: 2016-03-15
---
41818: 2022-12-15
41819: 2022-12-15
41820: 2022-12-15
41821: 2022-12-15
41822: 2022-12-15'tzone' attributes are inconsistentUsage statistics for R packages loaded at least twice
# count only one use per project
dat_count = dat[, .(date = min(date)), by = c("dataset_id", "Package")]
dat_count = dat_count[, .(`Number of times loaded` = .N), by = "Package"]
dat_count = dat_count[order(-`Number of times loaded`)]
dat_count = dat_count[`Number of times loaded` > 1]
DT::datatable(dat_count, options = list(pageLength = 50), rownames = FALSE, width = 300)- 1
renvwas not able to parse every script.