ref.bib

@article{Collin_2021,
  author   = {Collin, François-David and Durif, Ghislain and Raynal, Louis and Lombaert, Eric and Gautier, Mathieu and Vitalis, Renaud and Marin, Jean-Michel and Estoup, Arnaud},
  title    = {Extending approximate Bayesian computation with supervised machine learning to infer demographic history from genetic polymorphisms using DIYABC Random Forest},
  journal  = {Molecular Ecology Resources},
  volume   = {21},
  number   = {8},
  pages    = {2598-2613},
  keywords = {approximate Bayesian computation, demographic history, model or scenario selection, parameter estimation, pool-sequencing, population genetics, random forest, SNP, supervised machine learning},
  doi      = {https://doi.org/10.1111/1755-0998.13413},
  url      = {https://onlinelibrary.wiley.com/doi/abs/10.1111/1755-0998.13413},
  eprint   = {https://onlinelibrary.wiley.com/doi/pdf/10.1111/1755-0998.13413},
  abstract = {Abstract Simulation-based methods such as approximate Bayesian computation (ABC) are well-adapted to the analysis of complex scenarios of populations and species genetic history. In this context, supervised machine learning (SML) methods provide attractive statistical solutions to conduct efficient inferences about scenario choice and parameter estimation. The Random Forest methodology (RF) is a powerful ensemble of SML algorithms used for classification or regression problems. Random Forest allows conducting inferences at a low computational cost, without preliminary selection of the relevant components of the ABC summary statistics, and bypassing the derivation of ABC tolerance levels. We have implemented a set of RF algorithms to process inferences using simulated data sets generated from an extended version of the population genetic simulator implemented in DIYABC v2.1.0. The resulting computer package, named DIYABC Random Forest v1.0, integrates two functionalities into a user-friendly interface: the simulation under custom evolutionary scenarios of different types of molecular data (microsatellites, DNA sequences or SNPs) and RF treatments including statistical tools to evaluate the power and accuracy of inferences. We illustrate the functionalities of DIYABC Random Forest v1.0 for both scenario choice and parameter estimation through the analysis of pseudo-observed and real data sets corresponding to pool-sequencing and individual-sequencing SNP data sets. Because of the properties inherent to the implemented RF methods and the large feature vector (including various summary statistics and their linear combinations) available for SNP data, DIYABC Random Forest v1.0 can efficiently contribute to the analysis of large SNP data sets to make inferences about complex population genetic histories.},
  year     = {2021}
}


@article{pudlo2015reliable,
  title     = {Reliable ABC model choice via random forests},
  author    = {Pudlo, Pierre and Marin, Jean-Michel and Estoup, Arnaud and Cornuet, Jean-Marie and Gautier, Mathieu and Robert, Christian P},
  journal   = {Bioinformatics},
  volume    = {32},
  number    = {6},
  pages     = {859--866},
  year      = {2015},
  publisher = {Oxford University Press}
}

@article{raynal2016abc,
  author   = {Raynal, Louis and Marin, Jean-Michel and Pudlo, Pierre and Ribatet, Mathieu and Robert, Christian P and Estoup, Arnaud},
  title    = {{ABC random forests for Bayesian parameter inference}},
  journal  = {Bioinformatics},
  volume   = {35},
  number   = {10},
  pages    = {1720-1728},
  year     = {2018},
  month    = {10},
  abstract = {{Approximate Bayesian computation (ABC) has grown into a standard methodology that manages Bayesian inference for models associated with intractable likelihood functions. Most ABC implementations require the preliminary selection of a vector of informative statistics summarizing raw data. Furthermore, in almost all existing implementations, the tolerance level that separates acceptance from rejection of simulated parameter values needs to be calibrated.We propose to conduct likelihood-free Bayesian inferences about parameters with no prior selection of the relevant components of the summary statistics and bypassing the derivation of the associated tolerance level. The approach relies on the random forest (RF) methodology of Breiman (2001) applied in a (non-parametric) regression setting. We advocate the derivation of a new RF for each component of the parameter vector of interest. When compared with earlier ABC solutions, this method offers significant gains in terms of robustness to the choice of the summary statistics, does not depend on any type of tolerance level, and is a good trade-off in term of quality of point estimator precision and credible interval estimations for a given computing time. We illustrate the performance of our methodological proposal and compare it with earlier ABC methods on a Normal toy example and a population genetics example dealing with human population evolution.All methods designed here have been incorporated in the R package abcrf (version 1.7.1) available on CRAN.Supplementary data are available at Bioinformatics online.}},
  issn     = {1367-4803},
  doi      = {10.1093/bioinformatics/bty867},
  url      = {https://doi.org/10.1093/bioinformatics/bty867},
  eprint   = {http://oup.prod.sis.lan/bioinformatics/article-pdf/35/10/1720/28639964/bty867.pdf}
}


@article{wright2015ranger,
  title   = {Ranger: a fast implementation of random forests for high dimensional data in C++ and R},
  author  = {Wright, Marvin N and Ziegler, Andreas},
  journal = {arXiv preprint arXiv:1508.04409},
  year    = {2015}
}

 @misc{eigenweb,
  author       = {Ga\"{e}l Guennebaud and Beno\^{i}t Jacob and others},
  title        = {Eigen v3},
  howpublished = {http://eigen.tuxfamily.org},
  year         = {2010}
}

@book{friedman2001elements,
  title     = {The elements of statistical learning},
  author    = {Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert},
  volume    = {1},
  number    = {10},
  year      = {2001},
  publisher = {Springer series in statistics New York, NY, USA:}
}

@inproceedings{lakshminarayanan2014mondrian,
  title     = {Mondrian forests: Efficient online random forests},
  author    = {Lakshminarayanan, Balaji and Roy, Daniel M and Teh, Yee Whye},
  booktitle = {Advances in neural information processing systems},
  pages     = {3140--3148},
  year      = {2014}
}

@inproceedings{collin:hal-02910067,
  title       = {{Bringing ABC inference to the machine learning realm : AbcRanger, an optimized random forests library for ABC}},
  author      = {Collin, Fran{\c c}ois-David and Estoup, Arnaud and Marin, Jean-Michel and Raynal, Louis},
  url         = {https://hal.archives-ouvertes.fr/hal-02910067},
  note        = {Virtual conference},
  booktitle   = {{JOBIM 2020}},
  address     = {Montpellier, France},
  series      = {JOBIM},
  volume      = {2020},
  pages       = {66},
  year        = {2020},
  month       = Jun,
  keywords    = {Approximate Bayesian Computation ; Random Forests ; Model Choice ; Param- eter Estimation ; C++ ; Python ; R},
  pdf         = {https://hal.archives-ouvertes.fr/hal-02910067/file/jobim_proceedings.pdf},
  hal_id      = {hal-02910067},
  hal_version = {v2}
}

@article{JMLR:v19:17-374,
  author  = {Jarno Lintusaari and Henri Vuollekoski and Antti Kangasr{\"a}{\"a}si{\"o} and Kusti Skyt{\'e}n and Marko J{\"a}rvenp{\"a}{\"a} and Pekka Marttinen and Michael U. Gutmann and Aki Vehtari and Jukka Corander and Samuel Kaski},
  title   = {ELFI: Engine for Likelihood-Free Inference},
  journal = {Journal of Machine Learning Research},
  year    = {2018},
  volume  = {19},
  number  = {16},
  pages   = {1-7},
  url     = {http://jmlr.org/papers/v19/17-374.html}
}