fix NCBI urls from http to https, bump dev version, fix #95

ropensci · Apr 25, 2017 · 9474c4e · 9474c4e
1 parent b5568c8
commit 9474c4e
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 44 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -5,7 +5,7 @@ Description: Species trait data from many different sources, including
     data from the Global Invasive Species Database and 'EOL', 'Traitbank' data
     from 'EOL', Coral traits data from http://coraltraits.org, 'nativity' status
     ('Flora Europaea' or 'ITIS'), and 'Birdlife' International.
-Version: 0.2.0.9411
+Version: 0.2.0.9415
 Authors@R: c(
     person("Scott", "Chamberlain", role = c("aut", "cre"), email = "myrmecocystus@gmail.com"),
     person("Zachary", "Foster", role = "aut", email = "zacharyfoster1989@gmail.com"),

diff --git a/R/ncbi_byid.R b/R/ncbi_byid.R
@@ -38,7 +38,7 @@ ncbi_byid <- function(ids, format=NULL, verbose=TRUE) {
 
   x <- paste(ids, collapse = ",")
   mssg(verbose, "Retrieving sequence IDs...")
-  tt <- GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
+  tt <- GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
             query = list(db = "sequences", id = x, retmode = "xml"))
   stop_for_status(tt)
   mssg(verbose, "Parsing...")
@@ -58,36 +58,3 @@ ncbi_byid <- function(ids, format=NULL, verbose=TRUE) {
   mssg(verbose, "...done")
   data.frame(rbindlist(tmp))
 }
-
-# not_spp <- c("mitochondrial", "voucher", "^ATCC$", "^DNA$", "sequence",
-#              "^satellite$", "^mRNA$", "^unnamed protein product$", "^gene$")
-
-# ids <- paste(ids, collapse = ",")
-# queryseq <- list(db = "sequences", id = ids, rettype = format, retmode = "text")
-# tt <- GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", query = queryseq)
-# stop_for_status(tt)
-# outseq <- content(tt, "text", encoding = "UTF-8")
-#
-# outseq2 <- strsplit(outseq, '>')[[1]][-1]
-
-# foo <- function(x){
-#   temp <- paste(">", x, sep = "")
-#   seq <- gsub("\n", "", strsplit(sub("\n", "<<<", temp[[1]]), "<<<")[[1]][[2]])
-#   idaccess <- strsplit(x, "\\|")[[1]][c(2,4)]
-#   desc <- strsplit(strsplit(x, "\\|")[[1]][[5]], "\n")[[1]][[1]]
-#   outt <- list(desc, as.character(idaccess[1]), idaccess[2], nchar(seq), seq)
-#
-#   fifth <- strsplit(temp, "\\|")[[1]][[5]]
-#   if (grepl("\\[.+\\]", fifth)) {
-#     spused <- gsub("\\[|\\]", "", strextract(fifth, "\\[.+\\]"))
-#   } else {
-#     spused <-
-#       strsplit(gsub("^\\s+|\\s+$", "", fifth, "both"), " ")[[1]][1:3]
-#     spused <-
-#       grep(paste0(not_spp, collapse = "|"), spused, invert = TRUE, value = TRUE)
-#     spused <- paste(spused, sep = "", collapse = " ")
-#   }
-#
-#   setNames(data.frame(spused = spused, outt, stringsAsFactors = FALSE),
-#            c("taxon","gene_desc","gi_no","acc_no","length","sequence"))
-# }
diff --git a/R/ncbi_byname.R b/R/ncbi_byname.R
@@ -39,7 +39,7 @@ ncbi_byname <- function(taxa, gene="COI", seqrange="1:3000", getrelated=FALSE,
       RetMax = 500)
 
     out <-
-      xml2::xml_find_all(xml2::read_xml(content(GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
+      xml2::xml_find_all(xml2::read_xml(content(GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
                              query = query), "text", encoding = "UTF-8")), "//eSearchResult")[[1]]
     if (as.numeric(xml2::xml_text(xml2::xml_find_all(out, "//Count")[[1]])) == 0) {
       message(paste("no sequences of ", gene, " for ", xx, " - getting other sp.", sep = ""))
@@ -54,7 +54,7 @@ ncbi_byname <- function(taxa, gene="COI", seqrange="1:3000", getrelated=FALSE,
         newname <- strsplit(xx, " ")[[1]][[1]]
         query <- list(db = "nuccore", term = paste(newname, "[Organism] AND", genes_, "AND", seqrange, "[SLEN]", collapse = " "), RetMax = 500)
         out <-
-          xml2::xml_find_all(xml2::read_xml(content(GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", query = query),
+          xml2::xml_find_all(xml2::read_xml(content(GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", query = query),
                                                     "text", encoding = "UTF-8")), "//eSearchResult")[[1]]
         if (as.numeric(xml2::xml_text(xml2::xml_find_all(out, "//Count")[[1]])) == 0) {
           mssg(verbose, paste("no sequences of ", gene, " for ", xx, " or ", newname, sep = ""))
@@ -68,7 +68,7 @@ ncbi_byname <- function(taxa, gene="COI", seqrange="1:3000", getrelated=FALSE,
           querysum <- list(db = "nucleotide", id = paste(make_ids(out), collapse = " ")) # construct query for species
           res <- parse_ncbi(xx,
                 xml2::xml_find_all(
-                  xml2::read_xml(content(GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
+                  xml2::read_xml(content(GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
                                    query = querysum), "text", encoding = "UTF-8")), "//eSummaryResult"), verbose)
         }
       }
@@ -77,7 +77,7 @@ ncbi_byname <- function(taxa, gene="COI", seqrange="1:3000", getrelated=FALSE,
       mssg(verbose, "...retrieving sequence ID with longest sequence length...")
       querysum <- list(db = "nucleotide", id = paste(make_ids(out), collapse = " ")) # construct query for species
       res <- parse_ncbi(xx, xml2::xml_find_all(xml2::read_xml(content( # API call
-        GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
+        GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
             query = querysum), "text", encoding = "UTF-8")), "//eSummaryResult")[[1]], verbose)
     }
 
@@ -105,7 +105,7 @@ parse_ncbi <- function(xx, z, verbose){
   ## Get sequence from previous
   mssg(verbose, "...retrieving sequence...")
   queryseq <- list(db = "sequences", id = gisuse[,1], rettype = "fasta", retmode = "text")
-  outseq <- content(GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", query = queryseq), "text", encoding = "UTF-8")
+  outseq <- content(GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", query = queryseq), "text", encoding = "UTF-8")
   seq <- gsub("\n", "", strsplit(sub("\n", "<<<", outseq), "<<<")[[1]][[2]])
   accessnum <- strsplit(outseq, "\\|")[[1]][4]
   outt <- list(xx, as.character(gisuse[,3]), gisuse[,1], accessnum, gisuse[,2], seq)

diff --git a/R/ncbi_searcher.R b/R/ncbi_searcher.R
@@ -10,7 +10,7 @@
 #' @param entrez_query (\code{character}; length 1) An Entrez-format query to filter results with.
 #'   This is useful to search for sequences with specific characteristics. The format is the same
 #'   as the one used to seach genbank.
-#'   (\url{http://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options})
+#'   (\url{https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options})
 #' @param fuzzy (logical) Whether to do fuzzy taxonomic ID search or exact search. If \code{TRUE},
 #'    we use \code{xXarbitraryXx[porgn:__txid<ID>]}, but if \code{FALSE}, we use \code{txid<ID>}.
 #'    Default: \code{FALSE}
@@ -91,8 +91,8 @@ ncbi_searcher <- function(taxa = NULL, id = NULL, seqrange="1:3000", getrelated=
 }
 
 # Constants --------------------------------------------------------------------------------------
-url_esearch <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
-url_esummary <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
+url_esearch <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+url_esummary <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
 
 # Function to process queries one at a time ------------------------------------------------------
 ncbi_searcher_foo <- function(xx, getrelated, verbose, seqrange, entrez_query, fuzzy, limit, hypothetical, ...) {

diff --git a/man/ncbi_searcher.Rd b/man/ncbi_searcher.Rd