| join.tbl_dt {dtplyr} | R Documentation |
See join for a description of the general purpose of the
functions.
inner_join.data.table(x, y, by = NULL, copy = FALSE, suffix = c(".x",
".y"), ...)
left_join.data.table(x, y, by = NULL, copy = FALSE, suffix = c(".x",
".y"), ...)
right_join.data.table(x, y, by = NULL, copy = FALSE, suffix = c(".x",
".y"), ...)
full_join.data.table(x, y, by = NULL, copy = FALSE, suffix = c(".x",
".y"), ...)
semi_join.data.table(x, y, by = NULL, copy = FALSE, ...)
anti_join.data.table(x, y, by = NULL, copy = FALSE, ...)
x, y |
tbls to join |
by |
a character vector of variables to join by. If To join by different variables on x and y use a named vector.
For example, |
copy |
If |
suffix |
If there are non-joined duplicate variables in |
... |
Included for compatibility with generic; otherwise ignored. |
library(dplyr, warn.conflicts = FALSE)
if (require("Lahman")) {
batting_dt <- tbl_dt(Batting)
person_dt <- tbl_dt(Master)
# Inner join: match batting and person data
inner_join(batting_dt, person_dt)
# Left join: keep batting data even if person missing
left_join(batting_dt, person_dt)
# Semi-join: find batting data for top 4 teams, 2010:2012
grid <- expand.grid(
teamID = c("WAS", "ATL", "PHI", "NYA"),
yearID = 2010:2012)
top4 <- semi_join(batting_dt, grid, copy = TRUE)
# Anti-join: find batting data with out player data
anti_join(batting_dt, person_dt)
}