Skip to content

Commit

Permalink
fix NCBI urls from http to https, bump dev version, fix #95
Browse files Browse the repository at this point in the history
  • Loading branch information
sckott committed Apr 25, 2017
1 parent b5568c8 commit 9474c4e
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 44 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Description: Species trait data from many different sources, including
data from the Global Invasive Species Database and 'EOL', 'Traitbank' data
from 'EOL', Coral traits data from http://coraltraits.org, 'nativity' status
('Flora Europaea' or 'ITIS'), and 'Birdlife' International.
Version: 0.2.0.9411
Version: 0.2.0.9415
Authors@R: c(
person("Scott", "Chamberlain", role = c("aut", "cre"), email = "myrmecocystus@gmail.com"),
person("Zachary", "Foster", role = "aut", email = "zacharyfoster1989@gmail.com"),
Expand Down
35 changes: 1 addition & 34 deletions R/ncbi_byid.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ ncbi_byid <- function(ids, format=NULL, verbose=TRUE) {

x <- paste(ids, collapse = ",")
mssg(verbose, "Retrieving sequence IDs...")
tt <- GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
tt <- GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
query = list(db = "sequences", id = x, retmode = "xml"))
stop_for_status(tt)
mssg(verbose, "Parsing...")
Expand All @@ -58,36 +58,3 @@ ncbi_byid <- function(ids, format=NULL, verbose=TRUE) {
mssg(verbose, "...done")
data.frame(rbindlist(tmp))
}

# not_spp <- c("mitochondrial", "voucher", "^ATCC$", "^DNA$", "sequence",
# "^satellite$", "^mRNA$", "^unnamed protein product$", "^gene$")

# ids <- paste(ids, collapse = ",")
# queryseq <- list(db = "sequences", id = ids, rettype = format, retmode = "text")
# tt <- GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", query = queryseq)
# stop_for_status(tt)
# outseq <- content(tt, "text", encoding = "UTF-8")
#
# outseq2 <- strsplit(outseq, '>')[[1]][-1]

# foo <- function(x){
# temp <- paste(">", x, sep = "")
# seq <- gsub("\n", "", strsplit(sub("\n", "<<<", temp[[1]]), "<<<")[[1]][[2]])
# idaccess <- strsplit(x, "\\|")[[1]][c(2,4)]
# desc <- strsplit(strsplit(x, "\\|")[[1]][[5]], "\n")[[1]][[1]]
# outt <- list(desc, as.character(idaccess[1]), idaccess[2], nchar(seq), seq)
#
# fifth <- strsplit(temp, "\\|")[[1]][[5]]
# if (grepl("\\[.+\\]", fifth)) {
# spused <- gsub("\\[|\\]", "", strextract(fifth, "\\[.+\\]"))
# } else {
# spused <-
# strsplit(gsub("^\\s+|\\s+$", "", fifth, "both"), " ")[[1]][1:3]
# spused <-
# grep(paste0(not_spp, collapse = "|"), spused, invert = TRUE, value = TRUE)
# spused <- paste(spused, sep = "", collapse = " ")
# }
#
# setNames(data.frame(spused = spused, outt, stringsAsFactors = FALSE),
# c("taxon","gene_desc","gi_no","acc_no","length","sequence"))
# }
10 changes: 5 additions & 5 deletions R/ncbi_byname.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ ncbi_byname <- function(taxa, gene="COI", seqrange="1:3000", getrelated=FALSE,
RetMax = 500)

out <-
xml2::xml_find_all(xml2::read_xml(content(GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
xml2::xml_find_all(xml2::read_xml(content(GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi",
query = query), "text", encoding = "UTF-8")), "//eSearchResult")[[1]]
if (as.numeric(xml2::xml_text(xml2::xml_find_all(out, "//Count")[[1]])) == 0) {
message(paste("no sequences of ", gene, " for ", xx, " - getting other sp.", sep = ""))
Expand All @@ -54,7 +54,7 @@ ncbi_byname <- function(taxa, gene="COI", seqrange="1:3000", getrelated=FALSE,
newname <- strsplit(xx, " ")[[1]][[1]]
query <- list(db = "nuccore", term = paste(newname, "[Organism] AND", genes_, "AND", seqrange, "[SLEN]", collapse = " "), RetMax = 500)
out <-
xml2::xml_find_all(xml2::read_xml(content(GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", query = query),
xml2::xml_find_all(xml2::read_xml(content(GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi", query = query),
"text", encoding = "UTF-8")), "//eSearchResult")[[1]]
if (as.numeric(xml2::xml_text(xml2::xml_find_all(out, "//Count")[[1]])) == 0) {
mssg(verbose, paste("no sequences of ", gene, " for ", xx, " or ", newname, sep = ""))
Expand All @@ -68,7 +68,7 @@ ncbi_byname <- function(taxa, gene="COI", seqrange="1:3000", getrelated=FALSE,
querysum <- list(db = "nucleotide", id = paste(make_ids(out), collapse = " ")) # construct query for species
res <- parse_ncbi(xx,
xml2::xml_find_all(
xml2::read_xml(content(GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
xml2::read_xml(content(GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
query = querysum), "text", encoding = "UTF-8")), "//eSummaryResult"), verbose)
}
}
Expand All @@ -77,7 +77,7 @@ ncbi_byname <- function(taxa, gene="COI", seqrange="1:3000", getrelated=FALSE,
mssg(verbose, "...retrieving sequence ID with longest sequence length...")
querysum <- list(db = "nucleotide", id = paste(make_ids(out), collapse = " ")) # construct query for species
res <- parse_ncbi(xx, xml2::xml_find_all(xml2::read_xml(content( # API call
GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi",
query = querysum), "text", encoding = "UTF-8")), "//eSummaryResult")[[1]], verbose)
}

Expand Down Expand Up @@ -105,7 +105,7 @@ parse_ncbi <- function(xx, z, verbose){
## Get sequence from previous
mssg(verbose, "...retrieving sequence...")
queryseq <- list(db = "sequences", id = gisuse[,1], rettype = "fasta", retmode = "text")
outseq <- content(GET("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", query = queryseq), "text", encoding = "UTF-8")
outseq <- content(GET("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi", query = queryseq), "text", encoding = "UTF-8")
seq <- gsub("\n", "", strsplit(sub("\n", "<<<", outseq), "<<<")[[1]][[2]])
accessnum <- strsplit(outseq, "\\|")[[1]][4]
outt <- list(xx, as.character(gisuse[,3]), gisuse[,1], accessnum, gisuse[,2], seq)
Expand Down
6 changes: 3 additions & 3 deletions R/ncbi_searcher.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' @param entrez_query (\code{character}; length 1) An Entrez-format query to filter results with.
#' This is useful to search for sequences with specific characteristics. The format is the same
#' as the one used to seach genbank.
#' (\url{http://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options})
#' (\url{https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options})
#' @param fuzzy (logical) Whether to do fuzzy taxonomic ID search or exact search. If \code{TRUE},
#' we use \code{xXarbitraryXx[porgn:__txid<ID>]}, but if \code{FALSE}, we use \code{txid<ID>}.
#' Default: \code{FALSE}
Expand Down Expand Up @@ -91,8 +91,8 @@ ncbi_searcher <- function(taxa = NULL, id = NULL, seqrange="1:3000", getrelated=
}

# Constants --------------------------------------------------------------------------------------
url_esearch <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
url_esummary <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
url_esearch <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
url_esummary <- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"

# Function to process queries one at a time ------------------------------------------------------
ncbi_searcher_foo <- function(xx, getrelated, verbose, seqrange, entrez_query, fuzzy, limit, hypothetical, ...) {
Expand Down
2 changes: 1 addition & 1 deletion man/ncbi_searcher.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9474c4e

Please sign in to comment.