data.table
vs. base
vs. dplyr
library(data.table) DT <- data.table(V1 = rep(1:2, 5)[-10], V2 = 1:9, V3 = c(0.5, 1.0, 1.5), V4 = rep(LETTERS[1:3], 3)) class(DT) DT |
DF <- data.frame( V1 = rep(1:2, 5)[-10], V2 = 1:9, V3 = c(0.5, 1.0, 1.5), V4 = rep(LETTERS[1:3], 3) ) class(DF) DF |
library(dplyr) TB <- tibble(V1 = rep(1:2, 5)[-10], V2 = 1:9, V3 = rep(c(0.5, 1.0, 1.5), 3), V4 = rep(LETTERS[1:3], 3)) class(TB) TB |
DT[3:4,] |
DF[3:4,] |
TB[3:4,] |
DT[!3:7,] |
DF[-(3:7),] |
slice(TB, -(3:7)) |
DT[V2 > 5] DT[V4 %chin% c("A", "C")] |
DF[TB$V2 > 5, ] DF[TB$V4 %in% c("A", "C"), ] |
filter(TB, V2 > 5) filter(TB, V4 %in% c("A", "C")) |
DT[V1 == 1 & V4 == "A"] |
DF[DF$V1 == 1 & DF$V4 == "A"] |
filter(TB, V1 == 1, V4 == "A") |
unique(DT) unique(DT, by = c("V1", "V4")) |
DF[!duplicated(DF), ] DF[!duplicated(DF[c("V1", "V4")]), ] |
distinct(TB) distinct(TB, across(c(V1, V4))) |
na.omit(DT, cols = 1:4) |
DF[complete.cases(DF[, 1:4]), ] |
tidyr::drop_na(TB, names(DF)) |
DT[sample(.N, 3)] # .N = nb of rows in DT DT[sample(.N, .N / 2)] DT[frankv(-V1, ties.method = "dense") < 2] |
DF[sample(nrow(DF), 3), ] DF[sample(nrow(DF), nrow(DF) / 2), ] DF[rank(-DF$V1, ties.method = "dense") < 2, ] |
slice_sample(TB, n = 3) slice_sample(TB, prop = 0.5) TB %>% slice_max(V1, n = 1) |
DT[V4 %like% "^B"] DT[V2 %between% c(3, 5)] DT[between(V2, 3, 5, incbounds = FALSE)] DT[V2 %inrange% list(-1:1, 1:3)] |
DF[grepl("^B", DF$V4), ] DF[DF$V2 >= 3 & DF$V2 <= 5, ] DF[DF$V2 > 3 & DF$V2 < 5, ] DF[DF$V2 %in% c(-1:1, 1:3), ] |
filter(TB, grepl("^B", V4)) filter(TB, dplyr::between(V2, 3, 5)) filter(TB, V2 > 3 & V2 < 5) filter(TB, V2 >= -1:1 & V2 <= 1:3) |
DT[[3]] DT[, 3] |
DF[[3]] DF[3] |
TB[[3]] TB[3] |
DT[, list(V2)] # data.table DT[, .(V2)] # data.table DT[, "V2"] # data.table DT[, V2] # vector DT[["V2"]] # vector |
DF["V2"]. # data frame setNames(DF$V2, DFV$V4) # vector DF[, "V2", drop = FALSE] # data frame DF[["V2"]] # vector |
select(TB, V2) # tibble pull(TB, V2, name = V4) # vector TB[, "V2"] # tibble TB[["V2"]] # vector |
DT[, .(V2, V3, V4)] DT[, list(V2, V3, V4)] DT[, V2:V4] |
DF[c("V2", "V3", "V4")] DF[, which(names(DF) %in% c("V2", "V3", "V4"))] DF[, 2:4] |
select(TB, V2, V3, V4) select(TB, V2:V4) |
DT[, !c("V2", "V3")] |
DF[ , !(names(DF) %in% c("V2", "V3"))] |
select(TB, -V2, -V3) |
cols <- c("V2", "V3") DT[, ..cols] # .. prefix means 'one-level up' DT[, !..cols] |
cols <- c("V2", "V3") DF[, cols] DF[ , !(names(DF) %in% cols)] |
cols <- c("V2", "V3") select(DF, !!cols) select(DF, -!!cols) |
cols <- paste0("V", 1:2) cols <- union("V4", names(DT)) cols <- grep("V", names(DT)) cols <- grep("3$", names(DT)) cols <- grep(".2", names(DT)) cols <- grep("^V1|X$", names(DT)) cols <- grep("^(?!V2)", names(DT), perl = TRUE) DT[, ..cols] |
DF[ , grep("^V[1-2]$", names(DF))] DF[ , c("V4", setdiff(names(DF), "V4"))] DF[ , grep("V", names(DF))] DF[ , grep("3$", names(DF))] DF[ , grep(".2", names(DF))] DF[ , c("V1", "X")] DF[ , !grepl("^V2", names(DF))] |
select(TB, num_range("V", 1:2)) select(TB, V4, everything()) select(TB, contains("V")) select(TB, ends_with("3")) select(TB, matches(".2")) select(TB, one_of(c("V1", "X"))) select(TB, -starts_with("V2")) |
DT[order(V3)] |
DF[order(DF$V3)] |
arrange(TB, V3) |
DT[order(-V3)] |
DF[order(DF$V3, decreasing = TRUE)] |
arrange(TB, desc(V3)) |
DT[order(V1, -V2)] |
DF[order(DF$V1, -DF$V2), ] |
arrange(TB, V1, desc(V2)) |
DT[, sum(V1)] # vector DT[, .(sum(V1))] # data.table DT[, .(sumV1 = sum(V1))] # data.table |
sum(DF$V1) data.frame(sumV1 = sum(DF$V1)) |
summarise(TB, sum(V1)) summarise(TB, sumV1 = sum(V1)) |
DT[, .(sum(V1), sd(V3))] |
data.frame(sum_V1 = sum(DF$V1), sd_V3 = sd(DF$V3)) |
summarise(TB, sum(V1), sd(V3)) |
DT[, .(sumv1 = sum(V1), sdv3 = sd(V3))] |
data.frame(sumv1 = sum(V1), sdv3 = sum(V3)) |
TB %>% summarise(sumv1 = sum(V1), sdv3 = sd(V3)) |
DT[1:4, sum(V1)] |
sum(DF[1:4, "V1"]) |
TB %>% slice(1:4) %>% summarise(sum(V1)) |
DT[, data.table::first(V3)] DT[, data.table::last(V3)] DT[5, V3] DT[, uniqueN(V4)] uniqueN(DT) |
DF[1, "V3"] DF[nrow(DF), "V3"] DF[5, "V3"] length(unique(DF$V4)) length(unique(DF)) |
summarise(TB, dplyr::first(V3)) summarise(TB, dplyr::last(V3)) summarise(TB, nth(V3, 5)) summarise(TB, n_distinct(V4)) n_distinct(TB) |
DT[, .N, by = V4] |
as.data.frame(table(DF$V4)) |
TB %>% group_by(V4) %>% tally() |
DT[, V1 := V1^2] DT |
DF$V1 <- DF$V1^2 DF |
TB <- TB %>% mutate(V1 = V1^2) TB |
DT[, v5 := log(V1)] |
DF$v5 <- log(DF$V1) DF |
TB <- mutate(DF, v5 = log(V1)) |
DT[, c("v6", "v7") := .(sqrt(V1), "X")] DT[, ':='(v6 = sqrt(V1), v7 = "X")] |
DF$v6 <- sqrt(DF$V1) DF$v7 <- "X" DF |
TB <- mutate(TB, v6 = sqrt(V1), v7 = "X") |
DT[, .(v8 = V3 + 1)] |
data.frame(v8 = DF$V3 + 1) |
transmute(TB, v8 = V3 + 1) |
DT[, v5 := NULL] |
DF <- DF[, !names(DF %in% "v5")] |
TB <- select(TB, -v5) |
DT[, c("v6", "v7") := NULL] |
DF <- DF[, !(names(DF) %in% c("v6", "v7"))] |
TB <- select(TB, -v6, -v7) |
cols <- c("V3") DT[, (cols) := NULL] # ! not DT[, cols := NULL] |
cols <- c("V3") DF <- DF[, !(names(DF) %in% cols)] |
cols <- c("V3") TB <- select(TB, -one_of(cols)) |
DT[V2 < 4, V2 := 0L] DT |
DF$V2 <- replace(DF$V2, DF$V2 < 4, 0L) DF |
TB <- mutate(DF, V2 = base::replace(V2, V2 < 4, 0L)) TB |
DT[, .(sumV2 = sum(V2)), by = "V4"] DT[, by = V4, .(sumV2 = sum(V2))] |
aggregate(V2 ~ V4, data = DF, FUN = sum) |
TB %>% group_by(V4) %>% summarise(sumV2 = sum(V2)) |
DT[, keyby = .(V4, V1), .(sumV2 = sum(V2))] |
aggregate(V2 ~ V4 + V1, data = DF, FUN = sum) |
TB %>% group_by(V4, V1) %>% summarise(sumV2 = sum(V2)) %>% ungroup() |
DT[, by = tolower(V4), .(sumV1 = sum(V1))] |
aggregate(V! ~ tolower(V4), data = DF, FUN = sum) |
TB %>% group_by(tolower(V4)) %>% summarise(sumV1 = sum(V1)) |
DT[, keyby = .(abc = tolower(V4)), .(sumV1 = sum(V1))] |
aggregate(V1 ~ abc, data = transform(DF, abc = tolower(V4)), FUN = sum) |
TB %>% group_by(abc = tolower(V4)) %>% summarise(sumV1 = sum(V1)) |
DT[, keyby = V4 == "A", sum(V1)] |
aggreggate(V1 ~ groupA, data = transform(DF, groupA = V4 == "A"), FUN = sum) |
TB %>% group_by(V4 == "A") %>% summarise(sum(V1)) |
DT[1:5, # i .(sumV1 = sum(V1)), # j by = V4] # by |
aggregate(V1 ~ V4, data = DF[1:5,], sum) |
TB %>% slice(1:5) %>% group_by(V4) %>% summarise(sumV1 = sum(V1)) |
DT[, n := .N, by = V1][] DT[, n := NULL] |
DF$tally <- ave(DF$V1, DF$V1, FUN = length) |
TB %>% group_by(V1) %>% add_tally() |
DT[, data.table::first(V2), by = V4] DT[, data.table::last(V2), by = V4] DT[, V2[2], by = V4] |
aggregate(V2 ~ V4, data = DF, FUN = function(X) x[1]) aggregate(V2 ~ V4, data = DF, FUN = function(x) x[length(x)]) aggregate(V2 ~ V4, data = DF, FUN = function(x) x[2]) |
TB %>% group_by(V4) %>% summarise(dplyr::first(V2)) TB %>% group_by(V4) %>% summarise(dplyr::last(V2)) TB %>% group_by(V4) %>% summarise(dplyr::nth(V2, 2)) |
DT[, lapply(.SD, max)] |
apply(DF, 2, max) |
TB %>% summarise(across(everything(), max)) |
DT[, lapply(.SD, mean), .SDcols = c("V1", "V2")] |
data.frame(mean_V1 = mean(DF$V1), mean_V2 = mean(DF$V2)) |
TB %>% summarise(across(c(V1, V2), mean)) |
DT[, by = V4, lapply(.SD, mean), .SDcols = c("V1", "V2")] DT[, by = V4, lapply(.SD, mean), .SDcols = patterns("V1|V2|Z0")] |
cols <- intersect(C("V1", "V2", "Z0"), names(DF)) aggregate(DF[cols], by = LIST(DF$V4), FUN = mean, na.rm = TRUE) |
DT[, by = V4, c(lapply(.SD, sum), lapply(.SD, mean))] |
aggregate(cbind(sumV1 = DF$V1, meanV1 = DF$V1, sumV2 = DF$V2, meanV2 = DF$V2) ~ V4, data = DF, FUN = function(x) c(sum = sum(x), mean = mean(x))) |
TB %>% group_by(V4) %>% summarise(across(everything(), list(sum = sum, mean = mean))) |
DT[, lapply(.SD, mean), .SDcols = is.numeric] foo <- function(x) {is.numeric(x) && mean(x) > 3} DT[, lapply(.SD, mean), .SDcols = foo] |
sapply(DF[sapplu(DF, is.numeric)], mean) sapply(DF[sapply(DF, function(x) is.numeric(x) && mean(x) > 3)], mean) |
TB %>% summarise(across(where(is.numeric), mean)) TB %>% summarise(across(where(~ is.numeric(.x) && mean(.x) > 3), mean)) |
DT[, lapply(.SD, rev)] |
data.frame(lapply(DF, rev)) |
TB %>% mutate(across(everything(), rev)) |
DT[, lapply(.SD, sqrt), .SDcols = V1:V2] DT[, lapply(.SD, exp), .SDcols = !"V4"] |
DF[c("V1", "V2")] <- lapply(DF[c("V1", "V2")], sqrt) DF[setdiff(names(DF), "V4")] <- lapply(DF[setdiff(names(DF), "V4")], exp) |
DT[, c("V1", "V2") := lapply(.SD, sqrt), .SDcols = c("V1", "V2")] cols <- setdiff(names(DT), "V4") DT[, (cols) := lapply(.SD, "^", 2L), .SDcols = cols] DT |
DF[c("V1", "V2")] <- lapply(DF[c("V1", "V2")], sqrt) cols <- setdiff(names(DF), "V4") DF[cols] <- lapply(DF[cols], "^", 2L) |
TB <-TB %>% mutate(across(all_of(c("V1", "V2")), sqrt)) TB <- TB %>% mutate(across(-any_of("V4"), ~ "^"(.x, 2L))) TB |
DT[, .SD - 1, .SDcols = is.numeric] DT |
data.frame(lapply(DF, function(x) if (is.numeric(x)) x - 1 else x)) DF |
TB %>% transmute(across(where(is.numeric), ~ '-'(., 1L))) TB |
DT[, (cols) := lapply(.SD, as.integer), .SDcols = is.numeric] |
DF[sapply(DF, is.numeric)] <- lapply(DF[sapply(DF, is.numeric)], as.integer) DF |
TB <- TB %>% mutate(across(where(is.numeric), as.integer)) |
DT[, by = V4, .(V1[1:2], "X")] |
DF <- do.call(rbind, by(DF, DF$V4, function(sub) { head(data.frame(V1 = sub$V1, V2 = "X"), 2) })) |
TB %>% group_by(V4) %>% slice(1:2) %>% transmute(V1 = V1, V2 = "X") |
DT[, {print(V1) print(summary(V1)) x <- V1 + sum(V2) .(A = 1:.N, B = x) }] |
print(DF$V1) print(summary(DF$V1)) x <- DF$V1 + sum(DF$V2) data.frame(A = seq_along(DF$V1), B = x) |
# |
DT[, by = V4, .(V1sum = sum(V1)) ][ V1sum > 5] |
DF <- subset(aggregate(V1 ~ V4, data = DF, FUN = sum), V1 > 5) |
TB %>% group_by(V4) %>% summarise(V1sum = sum(V1)) %>% filter(V1sum > 5) |
DT[, by = V4, .(V1sum = sum(V1))] %>% .[order(-V1sum)] |
subset(aggregate(V1 ~ V4, data = DF, FUN = sum), V1 > 5) |
TB %>% group_by(V4) %>% summarise(V1sum = sum(V1)) %>% arrange(desc(V1sum)) |
setkey(DT, V4) setindex(DT, V4) DT |
DF[order(DF$V4), ] |
TB <- arrange(TB, V4) TB |
DT["A", on = "V4"] DT[c("A", "C"), on = .(V4)] |
DF[DF$V4 == "A", ] DF[DF$V4 %in% c("A", "C"), ] |
filter(TB, V4 == "A") filter(TB, V4 %in% c("A", "C")) |
DT["B", on = "V4", mult = "first"] DT[c("B", "C"), on = "V4", mult = "first"] |
DF[DF$V4 == "B", ][1, ] |
TB %>% filter(V4 == "B") %>% slice(1) # ? |
DT["A", on = "V4", mult = "last"] |
DF[DF$V4 == "A", ][nrow(DF[DF$V4 == "A", ]), ] |
TB %>% filter(V4 == "A") %>% slice(n()) |
DT[c("A", "D"), on = "V4", nomatch = NA] DT[c("A", "D"), on = "V4", nomatch = 0] |
DF[DF$V4 %in% c("A", "D"), ] |
filter(TB, V4 %in% c("A", "D")) |
DT[c("A", "C"), sum(V1), on = "V4"] |
sum(DF$V1[DF$V4 %in% c("A", "C")]) |
TB %>% filter(V4 %in% c("A", "C")) %>% summarise(sum(V1)) |
DT["A", V1 := 0, on = "V4"] DT |
DF$V1[DF$V4 == "A"] <- 0 |
TB <- TB %>% mutate(V1 = base::replace(V1, V4 == "A", 0L)) %>% arrange(V4) TB |
DT[!"B", sum(V1), on = "V4", by = .EACHI] DT[V4 != "B", by = V4, sum(V1)] |
aggregate(V1 ~ V4, data = DF[DF$V4 != "B", ], FUN = sum) |
TB %>% filter(V4 != "B") %>% group_by(V4) %>% summarise(sum(V1)) |
setkey(DT, V4, V1) setindex(DT, V4, V1) |
DF[order(DF$V4, DF$V1)] |
TB <- arrange(DF, V4, V1) |
DT[.("C", 1), on = .(V4, V1)] DT[.(c("B", "C"), 1), on = .(V4, V1)] DT[.(c("B", "C"), 1), on = .(V4, V1), which = TRUE] |
DF[DF$V4 == "C" & DT$V1 == 1, ] DF[DF$V4 %in% c("B", "C") & DT$V1 == 1, ] which(DF$V4 %in% c("B", "C") & DT$V1 == 1) |
filter(TB, V1 == 1, V4 == "C") filter(TB, V1 == 1, V4 %in% c("B", "C")) |
setkey(DT, NULL) setindex(DT, NULL) |
DF |
TB |
set(DT, i = 1L, j = 2L, value = 3L) DT |
DF[1, 2] <- 3 DF |
TB[1, 2] <- 3L TB |
setorder(DT, V4, -V1) setorderv(DT, c("V4", "V1"), c(1, -1)) |
DF <- DF[order(TB$V4, -DF$V1), ] DF |
TB <- arrange(TB, V4, desc(V1)) |
setnames(DT, old = "V2", new = "v2") setnames(DT, old = -(c(1, 3)), new = "V2") DT |
names(DF)[names(DF) == "V2"] <- "v2" names(DF)[names(DF) == "v2"] <- "V2" DF |
TB <- rename(DF, v2 = V2) TB <- rename(DF, V2 = v2) TB |
setcolorder(DT, c("V4", "V1", "V2")) DT |
DF <- DF[, c("V4", "V1", "V2")] DF |
TB <- select(TB, V4, V1, V2) TB |
DF <- DF[, c("V1", "V2", "V4")] DF |
TB %>% relocate(V4, .after = V2) |
DT[, .SD[1], by = V4] DT[, .SD[c(1, .N)], by = V4] DT[, tail(.SD, 2), by = V4] |
DF[ave(DF$V4, DF$V4, FUN = seq_along) == 1, ] DF[ave(DF$V4, DF$V4, FUN = seq_along) %in% c(1, max(ave(DF$V4, DF$V4, FUN = seq_along)))] do.call(rbind, by(DF, DF$V4, function(x) tail(x, 2))) |
TB %>% group_by(V4) %>% slice(1) TB %>% group_by(V4) %>% slice(1, n()) TB %>% group_by(V4) %>% group_map(~ tail(.x, 2)) |
DT[, .SD[which.min(V2)], by = V4] |
DF[order(DF$V2), ][ ave(DF$V4, DF$V4, FUN = seq_along) == 1,] |
TB %>% group_by(V4) %>% arrange(V2) %>% slice(1) |
DT[, Grp := .GRP, by = .(V4, V1)][] DT[, Grp := NULL] |
ave(seq_along(DF$V4), DF$V4, DF$V1, FUN = function(X) seq_along(x)) |
TB %>% group_by(V4, V1) %>% mutate(Grp = cur_group_id()) |
DT[, .I, by = V4] DT[, .I[1], by = V4] DT[, .I[c(1, .N)], by = V4] |
sapply(split(DT, DT$V4), function(group) rownames(groupe)) sapply(split(DT, DT$V4), function(group) rownames(group)[1]) sapply(split(DT, DT$V4), function(group) c(rownames(group)[1], rownames(group)[nrow(group)])) |
TB %>% group_by(V4) %>% mutate(cur_group_rows()) %>% ungroup() TB %>% group_by(V4) %>% summarize(cur_group_rows()[1]) TB %>% group_by(V4) %>% summarize(cur_group_rows()[c(1, n())]) |
DT[, .(.(V1)), by = V4] # V1 as list DT[, .(.(.SD)), by = V4] # subsets |
tapply(DF$V1, DF$V4, function(x) list(x)) split(DF, DF$V4) |
TB %>% group_by(V4) %>% summarise(list(V1)) TB %>% group_by(V4) %>% group_nest() |
rollup(DT, .(SumV2 = sum(V2)), by = c("V1", "V4")) rollup(DT, .(SumV2 = sum(V2), .N), by = c("V1", "V4"), id = TRUE) cube(DT, .(SumV2 = sum(V2), .N), by = c("V1", "V4"), id = TRUE) groupingsets(DT, .(SumV2 = sum(V2), .N), by = c("V1", "V4"), sets = list("V1", c("V1", "V4")), id = TRUE) |
aggregate(V2 ~ V1 + V4, data = DF, FUN = sum) aggregate(cbind(V2, N = 1) ~ V1 + V4, data = DF, FUN = function(X) c(sum(x), length(x))) aggregate(cbind(V2, N = 1) ~ V1 + V4, data = DF, FUN = function(c) c(sum(x), length(x)), na.action = NULL) aggregate(V2 ~ V1, data = DF, FUN = sum) aggregate(V2 ~ V1 + V4, data = DF, FUN = sum) |
fwrite(DT, "DT.csv") |
write.csv(DF, "DF.csv", row.names = FALSE) |
readr::write_csv(TB, "TB.csv") |
fwrite(DT, "DT.txt", sep = "\t") |
write.table(DF, "DF.txt", sep = "\t", row.names = FALSE, col.names = TRUE) |
readr::write_delim(TB, "TB.txt", delim = "\t") |
fwrite(setDT(list(0, list(1:5))), "DT2.csv") |
DT2 <- data.frame(V1 = 0, V2 = list(1:5)) write.csv(DT2, "DT2.csv", row.names = FALSE) |
fread("DT.csv") fread("DT.txt", sep = "\t") |
read.csv("DF.csv") read.table("DF.txt", sep = "\t", header = TRUE) |
readr::read_csv("TB.csv") readr::read_delim("TB.txt", delim = "\t") |
fread("DT.csv", select = c("V1", "V4")) fread("DT.csv", drop = "V4") |
read.csv("DF.csv")[, c("V1", "V4")] read.csv("DF.csv")[, !(names(read.csv("DF.csv")) %in% "V4")] |
rbindlist(lapply(c("DT.csv", "DT.csv"), fread)) |
do.call(rbind, lapply(c("DF.csv", "DF.csv"), read.csv)) |
c("TB.csv", "TB.csv") %>% purrr::map_dfr(readr::read_csv) |
melt(DT, id.vars = "V4") mDT <- melt(DT, id.vars = "V4", measure.vars = c("V1", "V2"), variable.name = "Variable", value.name = "Value") |
reshape(DF, varying = setdiff(names(DF), "V4"), v.names = "value", timevar = "variable", times = setdiff(names(DF), "V4"), direction = "long") reshape(DF, varying = setdiff(names(DF), "V4"), v.names = "Value", timevar = "Variable", times = setdiff(names(DF), "V4"), direction = "long") |
tidyr::gather(TB, variable, value, -V4) mTB <- tidyr::gather(TB, key = Variable, value = Value, -V4) |
dcast(mDT, V4 ~ Variable) dcast(mDT, V4 ~ Variable, fun.aggregate = sum) dcast(mDT, V4 ~ Value > 5) |
as.data.frame.matrix(replace(table(DF$V4, DF$Variable), is.na(table(DF$V4, DF$Variable)), 0)) |
tidyr::spread(data = count(mTB, V4, Variable), key = Variable, value = n, fill = 0) |
split(DT, by = "V4") |
split(DF, DF$V4) |
group_split(TB, V4) |
vec <- c("A:a", "B:b", "C:c") tstrsplit(vec, split = ":", keep = 2L) setDT(tstrsplit(vec, split = ":"))[] |
do.call(rbind, strsplit(vec, " ")) |> as.data.frame() |
vec <- c("A:a", "B:b", "C:c") tidyr::separate(tibble(vec), vec, c("V1", "V2")) |
tables() |
# |
# |
getDTthreads() setDTthreads() |
# |
# |
shift(1:10, n = 1, fill = NA, type = "lag") shift(1:10, n = 1:2, fill = NA, type = "lag") shift(1:10, n = 1, fill = NA, type = "lead") |
c(NA, 1:9) sapply(1:2, function(n) c(rep(NA, n), seq(1, 10)[1:(length(seq(1, 10)) - n)])) c(1:9, NA) |
lag(1:10, n = 1, default = NA) purrr::map(1:2, ~lag(1:10, n = .x)) lead(1:10, n = 1, default = NA) |
rleid(rep(c("a", "b", "a"), each = 3)) rleid(rep(c("a", "b", "a"), each = 3), prefix = "G") |
unlist(lapply(rle(rep(c("a", "b", "a"), each = 3))$lengths, seq_along)) paste0("G", unlist(lapply(rle(rep(c("a", "b", "a"), each = 3))$lengths, seq_along))) |
# |
x <- c(-3:3, NA) fifelse(test = x < 0, yes = "neg", no = "pos", na = "NA") |
x <- c(-3:3, NA) result <- ifelse(is.na(x), "NA", ifelse(x < 0, "neg", "pos")) result |
x <- c(-3:3, NA) if_else(condition = x < 0, true = "neg", false = "pos", missing = "NA") |
x <- 1:10 fcase( x %% 6 == 0, "fizz buzz", x %% 2 == 0, "fizz", x %% 3 == 0, "buzz", default = NA_character_ ) |
x <- 1:10 result <- ifelse(x %% 6 == 0, "fizz buzz", ifelse(x %% 2 == 0, "fizz", ifelse(x %% 3 == 0, "buzz", as.character(x)))) result |
x <- 1:10 case_when( x %% 6 == 0 ~ "fizz buzz", x %% 2 == 0 ~ "fizz", x %% 3 == 0 ~ "buzz", TRUE ~ as.character(x) ) |
y[x, on = "Id"] merge(x, y, all.x = TRUE, by = "Id") y[x] # requires keys |
merge(x, y, by = "Id", all.x = TRUE) |
left_join(x, y, by = "Id") |
x[y, on = "Id"] merge(x, y, all.y = TRUE, by = "Id") x[y] # requires keys |
merge(x, y, by = "Id", all.y = TRUE) |
right_join(x, y, by = "Id") |
x[y, on = "Id", nomatch = 0] merge(x, y) x[y, nomatch = 0] # requires keys |
merge(x, y, by = "Id") |
inner_join(x, y, by = "Id") |
merge(x, y, all = TRUE, by = "Id") |
merge(x, y, by = "Id", all = TRUE) |
full_join(x, y, by = "Id") |
unique(x[y$Id, on = "Id", nomatch = 0]) unique(x[y$Id, nomatch = 0]) # requires keys |
merge(x, y, by = "Id", all.x = TRUE)[!is.na(Id), ] |
semi_join(x, y, by = "Id") |
x[!y, on = "Id"] x[!y] # requires keys |
merge(x, y, by = "Id", all.x = TRUE, all.y = FALSE)[is.na(y$Id), ] |
anti_join(x, y, by = "Id") |
## i. prefix refers to cols in y x[y, .(Id, X1, i.XY)] # x. prefix refers to cols in x x[y, .(Id, x.XY, i.XY)] |
merge(select(x, Id, X1), select(y, Id, XY), by = "Id", all.x = FALSE, all.y = TRUE) merge(select(x, Id, XY), select(y, Id, XY), by = "Id", all.x = FALSE, all.y = TRUE) |
right_join(select(x, Id, X1), select(y, Id, XY), by = "Id") right_join(select(x, Id, XY), select(y, Id, XY), by = "Id") |
y[x, .(X1Y1 = sum(Y1) * X1), by = .EACHI] |
merge(aggregate(Y1 ~ Id, data = y, FUN = sum), x, by = "Id", all.x = FALSE, all.y = TRUE) %>% transform(X1Y1 = SumY1 * X1)[, c("Id", "X1Y1")] |
y %>% group_by(Id) %>% summarise(SumY1 = sum(Y1)) %>% right_join(x) %>% mutate(X1Y1 = SumY1 * X1) %>% select(Id, X1Y1) |
y[x, SqX1 := i.X1^2] y[, SqX1 := x[.BY, X1^2, on = "Id"], by = Id] y[, SqX1 := NULL] |
merge( transform(x[, c("Id", "X1")], SqX1 = X1^2), y, by = "Id", all.x = FALSE, all.y = TRUE)[, c(names(y), "SqX1")] |
x %>% select(Id, X1) %>% mutate(SqX1 = X1^2) %>% right_join(y, by = "Id") %>% select(names(y), SqX1) |
x[, y := .(.(y[.BY, on = "Id"])), by = Id] x[, y := NULL] |
merge(x, y, by = "Id", all = TRUE) |> transform(Nested = split(., .$Id)) |
nest_join(x, y, by = "Id") |
cols <- c("NewXY", "NewX1") icols <- paste0("i.", c("XY", "X1")) y[x, (cols) := mget(icols)] y[, (cols) := NULL] |
cols <- c("NewXY", "NewX1") icols <- paste0("i.", c("XY", "X1")) y[, cols] <- mget(icols) y[, cols] <- NULL |
# ? |
x[z, on = "X1 == Z1"] x[z, on = .(X1 == Z1)] x[z, on = .(Id == ID, X1 == Z1)] |
merge(x, z, by.x = "X1", by.y = "Z1", all.y = TRUE) merge(x, z, by.x = c("Id", "X1"), by.y = c("ID", "Z1"), all.y = TRUE) |
right_join(x, z, by = c("X1" = "Z1")) right_join(x, z, by = c("Id" = "ID", "X1" = "Z1")) |
x[z, on = .(Id == ID, X1 <= Z1)] x[z, on = .(Id == ID, X1 > Z1)] x[z, on = .(X1 < Z1), allow.cartesian = TRUE] |
subset(merge(x, z, by.x = "Id", by.y = "ID", all.x = TRUE), X1 <= Z1) subset(merge(x, z, by.x = "Id", by.y = "ID", all.x = TRUE), X1 > Z1) merge(x, z, by.x = "Id", by.y = "ID", all.x = TRUE)[X1 < Z1, ] |
# |
x[z, on = .(Id == ID, X1 == Z1), roll = "nearest"] setkey(x, Id, X1) x[.("C", 5:9), roll = "nearest"] |
merge(x, z, by.x = c("Id", "X1"), by.y = c("ID", "Z1"), all.x = TRUE) x[order(x$Id, x$X1)] x[.("C", 5:9), roll = "nearest"] |
# |
x[.("C", 5:9), roll = Inf] x[.("C", 5:9), roll = 0.5] x[.("C", 5:9), roll = Inf, rollends = c(FALSE, TRUE)] x[.("C", 5:9), roll = Inf, rollends = c(FALSE, FALSE)] |
x[x$V1 == "C" & x$V2 %in% 5:9, ] x[x$V1 == "C" & x$V2 >= 5 & x$V2 <= 9, ] x[x$V1 == "C" & x$V2 >= 5 & x$V2 <= 9, ] x[x$V1 == "C" & x$V2 > 5 & x$V2 < 9, ] |
# |
x[.("C", 5:9), roll = -Inf] x[.("C", 5:9), roll = -0.5] x[.("C", 5:9), roll = -Inf, rollends = c(TRUE, FALSE)] x[.("C", 5:9), roll = -Inf, rollends = c(TRUE, TRUE)] |
x[x$V1 == "C" & x$V2 <= 9 & x$V2 >= 5, ] x[x$V1 == "C" & x$V2 >= 5 & x$V2 <= 9, ] x[x$V1 == "C" & x$V2 > 5 & x$V2 <= 9, ] x[x$V1 == "C" & x$V2 >= 5 & x$V2 <= 9, ] |
# |
CJ(c(2, 1, 1), 3:2) CJ(c(2, 1, 1), 3:2, sorted = FALSE, unique = TRUE) |
expand.grid(c(2, 1, 1), 3:2) unique(expand.grid(c(2, 1, 1), 3:2)) |
expand.grid(c(2, 1, 1), 3:2) # |
rbind(x, y) rbind(x, z, fill = TRUE) |
rbind(x, y) rbind(x, z) |
bind_rows(x, y) bind_rows(x, z) |
rbindlist(list(x, y), idcol = TRUE) |
rbind(cbind(x, id = 1), cbind(y, id = 2)) |
bind_rows(list(x, y), .id = "id") |
base::cbind(x, y) |
cbind(x, y) |
bind_cols(x, y) |
fsetdiff(x, y) fsetdiff(x, y, all = TRUE) |
setdiff(x, y) |
dplyr::setdiff(x, y) |
funion(x, y) funion(x, y, all = TRUE) |
union(x, y) c(x, y) |
dplyr::union(x, y) union_all(x, y) |
fsetequal(x, x[order(-V1),]) all.equal(x, x) |
identical(x, x) all.equal(x, x) |
setequal(x, x[order(-V1),]) all_equal(x, x) |