Skip to content

Commit

Permalink
Closes #477. Join retains keys properly on factor types.
Browse files Browse the repository at this point in the history
  • Loading branch information
arunsrinivasan committed Jul 18, 2015
1 parent 425d554 commit 7f78b1b
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 5 deletions.
11 changes: 8 additions & 3 deletions R/bmerge.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,15 @@ bmerge <- function(i, x, leftcols, rightcols, io, xo, roll, rollends, nomatch, v
.Call(Cbmerge, i, x, as.integer(leftcols), as.integer(rightcols), io<-haskey(i), xo, roll, rollends, nomatch, f__, len__, allLen1)
# NB: io<-haskey(i) necessary for test 579 where the := above change the factor to character and remove i's key
if (verbose) {cat("done in",round(proc.time()[3]-last.started.at,3),"secs\n");flush.console}
for (ii in resetifactor) set(i,j=ii,value=origi[[ii]]) # in the caller's shallow copy, see comment at the top of this function for usage

# in the caller's shallow copy, see comment at the top of this function for usage
# We want to leave the coercions to i in place otherwise, since the caller depends on that to build the result

if (length(resetifactor)) {
for (ii in resetifactor)
set(i,j=ii,value=origi[[ii]])
if (haskey(origi))
setattr(i, 'sorted', key(origi))
}
return(list(starts=f__, lens=len__, allLen1=allLen1))
}

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@

55. `setDT` handles `key=` argument properly when input is already a `data.table`. Closes [#1169](https://github.com/Rdatatable/data.table/issues/1169). Thanks to @DavidArenburg for the PR.

56. Key is retained properly when joining on factor type columns. Closes [#477](https://github.com/Rdatatable/data.table/issues/477). Thanks to @nachti for the report.

#### NOTES

1. Clearer explanation of what `duplicated()` does (borrowed from base). Thanks to @matthieugomez for pointing out. Closes [#872](https://github.com/Rdatatable/data.table/issues/872).
Expand Down
15 changes: 13 additions & 2 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -2557,7 +2557,7 @@ test(916, DT[,newcol:=logical(0),by=a], data.table(a=1:3,b=1:6,newcol=NA))
# roll join error when non last join column is factor, #2450
X = data.table(id=2001:2004, uid=c(1001,1002,1001,1001), state=factor(c('CA','CA','CA','MA')), ts=c(51,52,53,54), key='state,uid,ts')
Y = data.table(id=3001:3004, uid=c(1001,1003,1002,1001), state=factor(c('CA','CA','CA','CA')), ts=c(51,57,59,59), key='state,uid,ts')
test(917, X[Y,roll=TRUE], data.table(id=INT(2001,2003,2002,NA), uid=c(1001,1001,1002,1003), state=factor('CA'), ts=c(51,59,59,57), i.id=INT(3001,3004,3003,3002)))
test(917, X[Y,roll=TRUE], data.table(id=INT(2001,2003,2002,NA), uid=c(1001,1001,1002,1003), state=factor('CA'), ts=c(51,59,59,57), i.id=INT(3001,3004,3003,3002), key='state,uid,ts'))

# NA in join column of type double, #2453.
X = data.table(name=c("Joh","Raf","Jon","Ste","Rob","Smi"),depID=c(NA,31,33,33,34,34),key="depID")
Expand Down Expand Up @@ -6070,7 +6070,7 @@ test(1482.3, truelength(ee$DT) >= 100L, TRUE) # truelength restored?
# Fix for #499 and #945
x <- data.table(k=as.factor(c(NA,1,2)),v=c(0,1,2), key="k")
y <- data.table(k=as.factor(c(NA,1,3)),v=c(0,1,3), key="k")
test(1483.1, x[y], data.table(k=factor(c(NA,1,3)), v=c(0,1,NA), i.v=c(0,1,3)))
test(1483.1, x[y], data.table(k=factor(c(NA,1,3)), v=c(0,1,NA), i.v=c(0,1,3), key="k"))
test(1483.2, merge(x,y,all=TRUE), data.table(k=factor(c(NA,1,2,3)), v.x=c(0,1,2,NA), v.y=c(0,1,NA,3), key="k"))

x <- data.table(country="US")
Expand Down Expand Up @@ -6638,6 +6638,17 @@ test(1540.32, DT1[DT3, lapply(.SD, function(x) x * mul),

# to do: add tests for :=

# fix for #477, key not being retained on joins on factor columns
set.seed(1)
dtp <- data.table(pid = gl(3, 3, labels = c("du", "i", "nouana")),
year = gl(3, 1, 9, labels = c("2007", "2010", "2012")),
val = rnorm(9), key = c("pid", "year"))
dtab <- data.table(pid = factor(c("i", "nouana")),
year = factor(c("2010", "2000")),
abn = sample(1:5, 2, replace = TRUE), key =
c("pid", "year"))
test(1541, key(dtp[dtab]), c("pid", "year"))

##########################


Expand Down

0 comments on commit 7f78b1b

Please sign in to comment.