diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd index 15c7ab0bc..448b15676 100644 --- a/.dev/CRAN_Release.cmd +++ b/.dev/CRAN_Release.cmd @@ -208,15 +208,15 @@ R CMD build . export GITHUB_PAT="f1c.. github personal access token ..7ad" # avoids many too-many-requests in --as-cran's ping-all-URLs step (20 mins) inside the `checking CRAN incoming feasibility...` step. # Many thanks to Dirk for the tipoff that setting this env variable solves the problem, #4832. -R CMD check data.table_1.13.7.tar.gz --as-cran -R CMD INSTALL data.table_1.13.7.tar.gz --html +R CMD check data.table_1.14.1.tar.gz --as-cran +R CMD INSTALL data.table_1.14.1.tar.gz --html # Test C locale doesn't break test suite (#2771) echo LC_ALL=C > ~/.Renviron R Sys.getlocale()=="C" q("no") -R CMD check data.table_1.13.7.tar.gz +R CMD check data.table_1.14.1.tar.gz rm ~/.Renviron # Test non-English does not break test.data.table() due to translation of messages; #3039, #630 @@ -233,9 +233,9 @@ q("no") # User supplied PKG_CFLAGS and PKG_LIBS passed through, #4664 # Next line from https://mac.r-project.org/openmp/. Should see the arguments passed through and then fail with gcc on linux. -PKG_CFLAGS='-Xclang -fopenmp' PKG_LIBS=-lomp R CMD INSTALL data.table_1.13.7.tar.gz +PKG_CFLAGS='-Xclang -fopenmp' PKG_LIBS=-lomp R CMD INSTALL data.table_1.14.1.tar.gz # Next line should work on Linux, just using superfluous and duplicate but valid parameters here to see them retained and work -PKG_CFLAGS='-fopenmp' PKG_LIBS=-lz R CMD INSTALL data.table_1.13.7.tar.gz +PKG_CFLAGS='-fopenmp' PKG_LIBS=-lz R CMD INSTALL data.table_1.14.1.tar.gz R remove.packages("xml2") # we checked the URLs; don't need to do it again (many minutes) @@ -266,7 +266,7 @@ alias R310=~/build/R-3.1.0/bin/R ### END ONE TIME BUILD cd ~/GitHub/data.table -R310 CMD INSTALL ./data.table_1.13.7.tar.gz +R310 CMD INSTALL ./data.table_1.14.1.tar.gz R310 require(data.table) test.data.table(script="*.Rraw") @@ -278,7 +278,7 @@ test.data.table(script="*.Rraw") vi ~/.R/Makevars # Make line SHLIB_OPENMP_CFLAGS= active to remove -fopenmp R CMD build . -R CMD INSTALL data.table_1.13.7.tar.gz # ensure that -fopenmp is missing and there are no warnings +R CMD INSTALL data.table_1.14.1.tar.gz # ensure that -fopenmp is missing and there are no warnings R require(data.table) # observe startup message about no OpenMP detected test.data.table() @@ -286,7 +286,7 @@ q("no") vi ~/.R/Makevars # revert change above R CMD build . -R CMD check data.table_1.13.7.tar.gz +R CMD check data.table_1.14.1.tar.gz ##################################################### @@ -336,8 +336,8 @@ alias Rdevel-strict-gcc='~/build/R-devel-strict-gcc/bin/R --vanilla' alias Rdevel-strict-clang='~/build/R-devel-strict-clang/bin/R --vanilla' cd ~/GitHub/data.table -Rdevel-strict-gcc CMD INSTALL data.table_1.13.7.tar.gz -Rdevel-strict-clang CMD INSTALL data.table_1.13.7.tar.gz +Rdevel-strict-gcc CMD INSTALL data.table_1.14.1.tar.gz +Rdevel-strict-clang CMD INSTALL data.table_1.14.1.tar.gz # Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with them so should be passed through to here Rdevel-strict-gcc Rdevel-strict-clang # repeat below with clang and gcc @@ -378,7 +378,7 @@ cd R-devel-valgrind make cd ~/GitHub/data.table vi ~/.R/Makevars # make the -O2 -g line active, for info on source lines with any problems -Rdevel-valgrind CMD INSTALL data.table_1.13.7.tar.gz +Rdevel-valgrind CMD INSTALL data.table_1.14.1.tar.gz R_DONT_USE_TK=true Rdevel-valgrind -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --show-leak-kinds=definite,possible --gen-suppressions=all --suppressions=./.dev/valgrind.supp -s" # the default for --show-leak-kinds is 'definite,possible' which we're setting explicitly here as a reminder. CRAN uses the default too. # including 'reachable' (as 'all' does) generates too much output from R itself about by-design permanent blocks @@ -416,7 +416,7 @@ cd ~/build/rchk/trunk . ../scripts/config.inc . ../scripts/cmpconfig.inc vi ~/.R/Makevars # set CFLAGS=-O0 -g so that rchk can provide source line numbers -echo 'install.packages("~/GitHub/data.table/data.table_1.13.7.tar.gz",repos=NULL)' | ./bin/R --slave +echo 'install.packages("~/GitHub/data.table/data.table_1.14.1.tar.gz",repos=NULL)' | ./bin/R --slave # objcopy warnings (if any) can be ignored: https://github.com/kalibera/rchk/issues/17#issuecomment-497312504 . ../scripts/check_package.sh data.table cat packages/lib/data.table/libs/*check @@ -574,7 +574,7 @@ du -k inst/tests # 0.75MB after R CMD build . export GITHUB_PAT="f1c.. github personal access token ..7ad" Rdevel -q -e "packageVersion('xml2')" # ensure installed -Rdevel CMD check data.table_1.13.6.tar.gz --as-cran # use latest Rdevel as it may have extra checks +Rdevel CMD check data.table_1.14.0.tar.gz --as-cran # use latest Rdevel as it may have extra checks # bunzip2 inst/tests/*.Rraw.bz2 # decompress *.Rraw again so as not to commit compressed *.Rraw to git # @@ -582,10 +582,7 @@ Resubmit to winbuilder (R-release, R-devel and R-oldrelease) Submit to CRAN. Message template : ------------------------------------------------------------ Hello, -921 CRAN revdeps checked. None are impacted. -valgrind 'additional check' fixed. -Solaris not yet resolved. -POUMM's gcc-ASAN error resolved by this data.table update. +1,016 CRAN revdeps checked. None are impacted. Many thanks! Best, Matt ------------------------------------------------------------ @@ -604,8 +601,8 @@ When CRAN's email contains "Pretest results OK pending a manual inspection" (or 3. Add new heading in NEWS for the next dev version. Add "(submitted to CRAN on )" on the released heading. 4. Bump dllVersion() in init.c 5. Bump 3 version numbers in Makefile -6. Search and replace this .dev/CRAN_Release.cmd to update 1.13.5 to 1.13.7, and 1.13.4 to 1.13.6 (e.g. in step 8 and 9 below) +6. Search and replace this .dev/CRAN_Release.cmd to update 1.13.7 to 1.14.1, and 1.13.6 to 1.14.0 (e.g. in step 8 and 9 below) 7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/) -8. Push to master with this consistent commit message: "1.13.6 on CRAN. Bump to 1.13.7" -9. Take sha from step 8 and run `git tag 1.13.6 96c..sha..d77` then `git push origin 1.13.6` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310) +8. Push to master with this consistent commit message: "1.14.0 on CRAN. Bump to 1.14.1" +9. Take sha from step 8 and run `git tag 1.14.0 96c..sha..d77` then `git push origin 1.14.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310) ###### diff --git a/DESCRIPTION b/DESCRIPTION index c7820bbb3..78ca52b48 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: data.table -Version: 1.13.7 +Version: 1.14.1 Title: Extension of `data.frame` Authors@R: c( person("Matt","Dowle", role=c("aut","cre"), email="mattjdowle@gmail.com"), diff --git a/Makefile b/Makefile index e1331064d..2be00d3b7 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ some: .PHONY: clean clean: - $(RM) data.table_1.13.7.tar.gz + $(RM) data.table_1.14.1.tar.gz $(RM) src/*.o $(RM) src/*.so @@ -28,7 +28,7 @@ build: .PHONY: install install: - $(R) CMD INSTALL data.table_1.13.7.tar.gz + $(R) CMD INSTALL data.table_1.14.1.tar.gz .PHONY: uninstall uninstall: @@ -40,7 +40,7 @@ test: .PHONY: check check: - _R_CHECK_CRAN_INCOMING_REMOTE_=false $(R) CMD check data.table_1.13.7.tar.gz --as-cran --ignore-vignettes --no-stop-on-test-error + _R_CHECK_CRAN_INCOMING_REMOTE_=false $(R) CMD check data.table_1.14.1.tar.gz --as-cran --ignore-vignettes --no-stop-on-test-error .PHONY: revision revision: diff --git a/NEWS.md b/NEWS.md index 77f7bf31f..a51de94eb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,13 +2,22 @@ **Benchmarks are regularly updated: [here](https://h2oai.github.io/db-benchmark/)** -# data.table [v1.13.7](https://github.com/Rdatatable/data.table/milestone/20) (in development) +# data.table [v1.14.1](https://github.com/Rdatatable/data.table/milestone/20) (in development) + +## NEW FEATURES + +## BUG FIXES + +## NOTES + + +# data.table [v1.14.0](https://github.com/Rdatatable/data.table/milestone/23?closed=1) (submitted to CRAN on 20 Feb 2021) ## POTENTIALLY BREAKING CHANGES 1. In v1.13.0 (July 2020) native parsing of datetime was added to `fread` by Michael Chirico which dramatically improved performance. Before then datetime was read as type character by default which was slow. Since v1.13.0, UTC-marked datetime (e.g. `2020-07-24T10:11:12.134Z` where the final `Z` is present) has been read automatically as POSIXct and quickly. We provided the migration option `datatable.old.fread.datetime.character` to revert to the previous slow character behavior. We also added the `tz=` argument to control unmarked datetime; i.e. where the `Z` (or equivalent UTC postfix) is missing in the data. The default `tz=""` reads unmarked datetime as character as before, slowly. We gave you the ability to set `tz="UTC"` to turn on the new behavior and read unmarked datetime as UTC, quickly. R sessions that are running in UTC by setting the TZ environment variable, as is good practice and common in production, have also been reading unmarked datetime as UTC since v1.13.0, much faster. Note 1 of v1.13.0 (below in this file) ended `In addition to convenience, fread is now significantly faster in the presence of dates, UTC-marked datetimes, and unmarked datetime when tz="UTC" is provided.`. - At `rstudio::global(2021)`, Neal Richardson, Director of Engineering at Ursa Labs, compared Arrow CSV performance to `data.table` CSV performance, [Bigger Data With Ease Using Apache Arrow](https://twitter.com/enpiar/status/1357729619420475392). He opened by comparing to `data.table` as his main point. Arrow was presented as 3 times faster than `data.table`. He talked at length about this result. However, no reproducible code was provided and we were not contacted in advance in case we had any comments. He mentioned New York Taxi data in his talk which is a dataset known to us as containing unmarked datetime. [Rebuttal](https://twitter.com/MattDowle/status/1360073970498875394). + At `rstudio::global(2021)`, Neal Richardson, Director of Engineering at Ursa Labs, compared Arrow CSV performance to `data.table` CSV performance, [Bigger Data With Ease Using Apache Arrow](https://rstudio.com/resources/rstudioglobal-2021/bigger-data-with-ease-using-apache-arrow/). He opened by comparing to `data.table` as his main point. Arrow was presented as 3 times faster than `data.table`. He talked at length about this result. However, no reproducible code was provided and we were not contacted in advance in case we had any comments. He mentioned New York Taxi data in his talk which is a dataset known to us as containing unmarked datetime. [Rebuttal](https://twitter.com/MattDowle/status/1360073970498875394). `tz=`'s default is now changed from `""` to `"UTC"`. If you have been using `tz=` explicitly then there should be no change. The change to read UTC-marked datetime as POSIXct rather than character already happened in v1.13.0. The change now is that unmarked datetimes are now read as UTC too by default without needing to set `tz="UTC"`. None of the 1,017 CRAN packages directly using `data.table` are affected. As before, the migration option `datatable.old.fread.datetime.character` can still be set to TRUE to revert to the old character behavior. This migration option is temporary and will be removed in the near future. @@ -999,7 +1008,7 @@ has a better chance of working on Mac. ## NOTES -1. The type coercion warning message has been improved, [#2989](https://github.com/Rdatatable/data.table/pull/2989). Thanks to @sarahbeeysian on [Twitter](https://twitter.com/sarahbeeysian/status/1021359529789775872) for highlighting. For example, given the follow statements: +1. The type coercion warning message has been improved, [#2989](https://github.com/Rdatatable/data.table/pull/2989). Thanks to @sarahbeeysian on Twitter for highlighting. For example, given the follow statements: ```R DT = data.table(id=1:3) diff --git a/src/init.c b/src/init.c index 6f3edec64..714608c40 100644 --- a/src/init.c +++ b/src/init.c @@ -414,6 +414,6 @@ SEXP initLastUpdated(SEXP var) { SEXP dllVersion() { // .onLoad calls this and checks the same as packageVersion() to ensure no R/C version mismatch, #3056 - return(ScalarString(mkChar("1.13.7"))); + return(ScalarString(mkChar("1.14.1"))); } diff --git a/vignettes/datatable-intro.Rmd b/vignettes/datatable-intro.Rmd index ddbb59024..1dcfe786f 100644 --- a/vignettes/datatable-intro.Rmd +++ b/vignettes/datatable-intro.Rmd @@ -38,7 +38,7 @@ Briefly, if you are interested in reducing *programming* and *compute* time trem ## Data {#data} -In this vignette, we will use [NYC-flights14](https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv) data obtained by [flights](https://github.com/arunsrinivasan/flights) package (available on GitHub only). It contains On-Time flights data from the [Bureau of Transporation Statistics](https://www.transtats.bts.gov) for all the flights that departed from New York City airports in 2014 (inspired by [nycflights13](https://github.com/hadley/nycflights13)). The data is available only for Jan-Oct'14. +In this vignette, we will use [NYC-flights14](https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv) data obtained by [flights](https://github.com/arunsrinivasan/flights) package (available on GitHub only). It contains On-Time flights data from the Bureau of Transporation Statistics for all the flights that departed from New York City airports in 2014 (inspired by [nycflights13](https://github.com/hadley/nycflights13)). The data is available only for Jan-Oct'14. We can use `data.table`'s fast-and-friendly file reader `fread` to load `flights` directly as follows: