From ae3105dad9c618af79278fc28b1a90d1a906f6a6 Mon Sep 17 00:00:00 2001 From: Elena Krismer <70535771+elena-krismer@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:54:22 +0200 Subject: [PATCH 01/11] fix cran malformat error --- R/try_query.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/try_query.R b/R/try_query.R index 8016a00b..42798def 100644 --- a/R/try_query.R +++ b/R/try_query.R @@ -31,6 +31,12 @@ try_query <- stop("The timeout cannot be less than 1 second.") } + # Check if the URL is valid + parsed_url <- httr::parse_url(url) + if (is.null(parsed_url$scheme) || is.null(parsed_url$hostname)) { + stop("The provided URL is not valid. Please check the format.") + } + # Check if there is an internet connection first if (!curl::has_internet()) { if (!silent) message("\nNo internet connection.") From fde317e250184c4d41c7b25bb9f413c284c749d0 Mon Sep 17 00:00:00 2001 From: Elena Krismer <70535771+elena-krismer@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:56:50 +0200 Subject: [PATCH 02/11] try_query update add to news --- NEWS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index f947cda2..be684c68 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# protti *** + +## Bug fixes +* `try_query()` now checks if a valid URL is provided before attempting to download data. + # protti 0.9.0 ## New features From 57a64475409bd95402e2e558efc5b389c6cca751 Mon Sep 17 00:00:00 2001 From: jpquast Date: Sun, 29 Sep 2024 20:30:24 +0200 Subject: [PATCH 03/11] fix vroom problem The issue was that the uniprot data seems to now be gziped. I handle this case now in try_query. Not sure if it is generally handled for any potential case but it at least works for uniprot. --- DESCRIPTION | 2 +- NAMESPACE | 4 ++++ R/try_query.R | 35 +++++++++++++++++++++++++++++++++-- man/try_query.Rd | 3 +-- 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0faf4c66..d2b5740d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -43,7 +43,7 @@ Imports: methods, R.utils, stats -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Suggests: testthat, covr, diff --git a/NAMESPACE b/NAMESPACE index 09754811..d81a3a5d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -116,6 +116,7 @@ importFrom(httr,modify_url) importFrom(httr,timeout) importFrom(janitor,clean_names) importFrom(janitor,make_clean_names) +importFrom(jsonlite,fromJSON) importFrom(magrittr,"%>%") importFrom(methods,is) importFrom(plotly,ggplotly) @@ -134,6 +135,7 @@ importFrom(purrr,pluck) importFrom(purrr,pmap) importFrom(purrr,reduce) importFrom(purrr,set_names) +importFrom(readr,read_csv) importFrom(readr,read_tsv) importFrom(readr,write_csv) importFrom(readr,write_tsv) @@ -191,3 +193,5 @@ importFrom(utils,data) importFrom(utils,download.file) importFrom(utils,head) importFrom(utils,untar) +importFrom(xml2,read_html) +importFrom(xml2,read_xml) diff --git a/R/try_query.R b/R/try_query.R index 42798def..27079134 100644 --- a/R/try_query.R +++ b/R/try_query.R @@ -13,7 +13,6 @@ #' @param type a character value that specifies the type of data at the target URL. Options are #' all options that can be supplied to httr::content, these include e.g. #' "text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values". -#' Default is "tab-separated-values". #' @param timeout a numeric value that specifies the maximum request time. Default is 60 seconds. #' @param accept a character value that specifies the type of data that should be sent by the API if #' it uses content negotiation. The default is NULL and it should only be set for APIs that use @@ -22,6 +21,9 @@ #' #' @importFrom curl has_internet #' @importFrom httr GET timeout http_error message_for_status http_status content accept +#' @importFrom readr read_tsv read_csv +#' @importFrom jsonlite fromJSON +#' @importFrom xml2 read_html read_xml #' #' @return A data frame that contains the table from the url. try_query <- @@ -94,7 +96,36 @@ try_query <- # Change variable to not show progress if readr is used options(readr.show_progress = FALSE) - result <- suppressMessages(httr::content(query_result, type = type, encoding = "UTF-8", ...)) + # Check if the content is gzip compressed + if (query_result$headers[["content-encoding"]] == "gzip") { + # Retrieve the content as raw bytes using httr::content + raw_content <- httr::content(query_result, type = "raw") + + # Decompress the raw content using base R's `memDecompress` + decompressed_content <- memDecompress(raw_content, type = "gzip") + + # Convert the raw bytes to a character string + text_content <- rawToChar(decompressed_content) + + # Read the decompressed content based on the specified type + if (type == "text/tab-separated-values") { + result <- readr::read_tsv(text_content, ...) + } else if (type == "text/html") { + result <- xml2::read_html(text_content, ...) + } else if (type == "text/xml") { + result <- xml2::read_xml(text_content, ...) + } else if (type == "text/csv" || type == "txt/csv") { + result <- readr::read_csv(text_content, ...) + } else if (type == "application/json") { + result <- jsonlite::fromJSON(text_content, ...) # Using jsonlite for JSON parsing + } else if (type == "text") { + result <- text_content # Return raw text as-is + } else { + stop("Unsupported content type: ", type) + } + } else { + result <- suppressMessages(httr::content(query_result, type = type, encoding = "UTF-8", ...)) + } return(result) } diff --git a/man/try_query.Rd b/man/try_query.Rd index 90d61467..cb9a64aa 100644 --- a/man/try_query.Rd +++ b/man/try_query.Rd @@ -26,8 +26,7 @@ that failed.} \item{type}{a character value that specifies the type of data at the target URL. Options are all options that can be supplied to httr::content, these include e.g. -"text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values". -Default is "tab-separated-values".} +"text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values".} \item{timeout}{a numeric value that specifies the maximum request time. Default is 60 seconds.} From badce14f92cbadba787c3073221fbb221d7c5449 Mon Sep 17 00:00:00 2001 From: jpquast Date: Sun, 29 Sep 2024 21:39:54 +0200 Subject: [PATCH 04/11] Add xml2 and jsonlite to suggests --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d2b5740d..81db7349 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,7 +67,9 @@ Suggests: iq, scales, farver, - ggforce + ggforce, + xml2, + jsonlite Depends: R (>= 4.0) URL: https://github.com/jpquast/protti, https://jpquast.github.io/protti/ From d4674f065f3dbeab6dc1e3735c4bec11e44ddd71 Mon Sep 17 00:00:00 2001 From: jpquast Date: Sun, 29 Sep 2024 22:03:37 +0200 Subject: [PATCH 05/11] Fixed another bug in try_query --- R/try_query.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/try_query.R b/R/try_query.R index 27079134..eb3ad46b 100644 --- a/R/try_query.R +++ b/R/try_query.R @@ -97,7 +97,7 @@ try_query <- options(readr.show_progress = FALSE) # Check if the content is gzip compressed - if (query_result$headers[["content-encoding"]] == "gzip") { + if (!is.null(query_result$headers[["content-encoding"]]) && query_result$headers[["content-encoding"]] == "gzip") { # Retrieve the content as raw bytes using httr::content raw_content <- httr::content(query_result, type = "raw") From 0c6fe0dac48aacde960de6f0d5271ba93806887b Mon Sep 17 00:00:00 2001 From: elena-krismer Date: Thu, 17 Oct 2024 16:44:19 +0000 Subject: [PATCH 06/11] Style code (GHA) --- R/try_query.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/try_query.R b/R/try_query.R index eb3ad46b..de4ce8d7 100644 --- a/R/try_query.R +++ b/R/try_query.R @@ -117,9 +117,9 @@ try_query <- } else if (type == "text/csv" || type == "txt/csv") { result <- readr::read_csv(text_content, ...) } else if (type == "application/json") { - result <- jsonlite::fromJSON(text_content, ...) # Using jsonlite for JSON parsing + result <- jsonlite::fromJSON(text_content, ...) # Using jsonlite for JSON parsing } else if (type == "text") { - result <- text_content # Return raw text as-is + result <- text_content # Return raw text as-is } else { stop("Unsupported content type: ", type) } From b66dafbfd520f895f9a407200de7cbf8e53cd98a Mon Sep 17 00:00:00 2001 From: Elena Krismer <70535771+elena-krismer@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:49:45 +0200 Subject: [PATCH 07/11] fix decompression --- R/try_query.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/try_query.R b/R/try_query.R index de4ce8d7..3f7b3a50 100644 --- a/R/try_query.R +++ b/R/try_query.R @@ -96,11 +96,13 @@ try_query <- # Change variable to not show progress if readr is used options(readr.show_progress = FALSE) - # Check if the content is gzip compressed - if (!is.null(query_result$headers[["content-encoding"]]) && query_result$headers[["content-encoding"]] == "gzip") { - # Retrieve the content as raw bytes using httr::content - raw_content <- httr::content(query_result, type = "raw") + # Retrieve the content as raw bytes using httr::content + raw_content <- httr::content(query_result, type = "raw") + # Check for gzip magic number (1f 8b) before decompression + compressed <- length(raw_content) >= 2 && raw_content[1] == as.raw(0x1f) && raw_content[2] == as.raw(0x8b) + # Check if the content is gzip compressed + if (!is.null(query_result$headers[["content-encoding"]]) && query_result$headers[["content-encoding"]] == "gzip" && compressed) { # Decompress the raw content using base R's `memDecompress` decompressed_content <- memDecompress(raw_content, type = "gzip") From ea69a89b4a55ad59479eee6195b37af58dcc41a3 Mon Sep 17 00:00:00 2001 From: Elena Krismer <70535771+elena-krismer@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:35:21 +0200 Subject: [PATCH 08/11] move xml2 and jsonlite to imports --- DESCRIPTION | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 81db7349..55458e24 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,7 +42,9 @@ Imports: httr, methods, R.utils, - stats + stats, + xml2, + jsonlite RoxygenNote: 7.3.2 Suggests: testthat, @@ -67,9 +69,7 @@ Suggests: iq, scales, farver, - ggforce, - xml2, - jsonlite + ggforce Depends: R (>= 4.0) URL: https://github.com/jpquast/protti, https://jpquast.github.io/protti/ From 33cc8ee35286a5be808b30852045f83d9458edf1 Mon Sep 17 00:00:00 2001 From: jpquast Date: Sat, 19 Oct 2024 12:15:14 +0200 Subject: [PATCH 09/11] Corrected try_query error handling --- DESCRIPTION | 10 +++++----- NAMESPACE | 3 --- NEWS.md | 2 +- R/try_query.R | 17 ++++++++--------- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 55458e24..5a443cf7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: protti Title: Bottom-Up Proteomics and LiP-MS Quality Control and Data Analysis Tools -Version: 0.9.0 +Version: 0.9.0.9000 Authors@R: c(person(given = "Jan-Philipp", family = "Quast", @@ -42,9 +42,7 @@ Imports: httr, methods, R.utils, - stats, - xml2, - jsonlite + stats RoxygenNote: 7.3.2 Suggests: testthat, @@ -69,7 +67,9 @@ Suggests: iq, scales, farver, - ggforce + ggforce, + xml2, + jsonlite Depends: R (>= 4.0) URL: https://github.com/jpquast/protti, https://jpquast.github.io/protti/ diff --git a/NAMESPACE b/NAMESPACE index d81a3a5d..e4e57730 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -116,7 +116,6 @@ importFrom(httr,modify_url) importFrom(httr,timeout) importFrom(janitor,clean_names) importFrom(janitor,make_clean_names) -importFrom(jsonlite,fromJSON) importFrom(magrittr,"%>%") importFrom(methods,is) importFrom(plotly,ggplotly) @@ -193,5 +192,3 @@ importFrom(utils,data) importFrom(utils,download.file) importFrom(utils,head) importFrom(utils,untar) -importFrom(xml2,read_html) -importFrom(xml2,read_xml) diff --git a/NEWS.md b/NEWS.md index be684c68..1c25e436 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,7 @@ # protti *** ## Bug fixes -* `try_query()` now checks if a valid URL is provided before attempting to download data. +* `try_query()` now correctly handles errors that don't return a response object. We also handle gzip decompression problems better since some databases compressed responses were not handled correctly. # protti 0.9.0 diff --git a/R/try_query.R b/R/try_query.R index 3f7b3a50..77b4403f 100644 --- a/R/try_query.R +++ b/R/try_query.R @@ -22,8 +22,6 @@ #' @importFrom curl has_internet #' @importFrom httr GET timeout http_error message_for_status http_status content accept #' @importFrom readr read_tsv read_csv -#' @importFrom jsonlite fromJSON -#' @importFrom xml2 read_html read_xml #' #' @return A data frame that contains the table from the url. try_query <- @@ -33,12 +31,6 @@ try_query <- stop("The timeout cannot be less than 1 second.") } - # Check if the URL is valid - parsed_url <- httr::parse_url(url) - if (is.null(parsed_url$scheme) || is.null(parsed_url$hostname)) { - stop("The provided URL is not valid. Please check the format.") - } - # Check if there is an internet connection first if (!curl::has_internet()) { if (!silent) message("\nNo internet connection.") @@ -85,11 +77,18 @@ try_query <- return(invisible("No internet connection")) } - if (httr::http_error(query_result)) { + # If response was an error return that error message + if (inherits(query_result, "response") && httr::http_error(query_result)) { if (!silent) httr::message_for_status(query_result) return(invisible(httr::http_status(query_result)$message)) } + # Handle other types of errors separately from query errors + if(inherits(query_result, "character")) { + if (!silent) message(query_result) + return(invisible(query_result)) + } + # Record readr progress variable to set back later readr_show_progress <- getOption("readr.show_progress") on.exit(options(readr.show_progress = readr_show_progress)) From 02d01a2fcce071fda0ed465655fb30535310a38f Mon Sep 17 00:00:00 2001 From: jpquast Date: Sat, 19 Oct 2024 10:17:15 +0000 Subject: [PATCH 10/11] Style code (GHA) --- R/try_query.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/try_query.R b/R/try_query.R index 77b4403f..2199c32b 100644 --- a/R/try_query.R +++ b/R/try_query.R @@ -84,7 +84,7 @@ try_query <- } # Handle other types of errors separately from query errors - if(inherits(query_result, "character")) { + if (inherits(query_result, "character")) { if (!silent) message(query_result) return(invisible(query_result)) } From a64c5c5538d94a8fa003711466484a7bbae41633 Mon Sep 17 00:00:00 2001 From: jpquast Date: Sat, 19 Oct 2024 12:57:39 +0200 Subject: [PATCH 11/11] Fix mobidb test --- tests/testthat/test-fetch_extract_and_enrichment_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-fetch_extract_and_enrichment_functions.R b/tests/testthat/test-fetch_extract_and_enrichment_functions.R index 6f420b21..d24ea4f3 100644 --- a/tests/testthat/test-fetch_extract_and_enrichment_functions.R +++ b/tests/testthat/test-fetch_extract_and_enrichment_functions.R @@ -21,7 +21,7 @@ if (Sys.getenv("TEST_PROTTI") == "true") { unis <- c("iRT", "P25437", "P30870", "P0A6P9") expect_warning(mobidb <- fetch_mobidb(unis)) expect_is(mobidb, "data.frame") - expect_equal(nrow(mobidb), 259) + expect_equal(nrow(mobidb), 221) expect_equal(ncol(mobidb), 6) })