From ae3105dad9c618af79278fc28b1a90d1a906f6a6 Mon Sep 17 00:00:00 2001
From: Elena Krismer <70535771+elena-krismer@users.noreply.github.com>
Date: Thu, 17 Oct 2024 17:54:22 +0200
Subject: [PATCH 01/11] fix cran malformat error

---
 R/try_query.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/R/try_query.R b/R/try_query.R
index 8016a00b..42798def 100644
--- a/R/try_query.R
+++ b/R/try_query.R
@@ -31,6 +31,12 @@ try_query <-
       stop("The timeout cannot be less than 1 second.")
     }
 
+    # Check if the URL is valid
+    parsed_url <- httr::parse_url(url)
+    if (is.null(parsed_url$scheme) || is.null(parsed_url$hostname)) {
+      stop("The provided URL is not valid. Please check the format.")
+    }
+
     # Check if there is an internet connection first
     if (!curl::has_internet()) {
       if (!silent) message("\nNo internet connection.")

From fde317e250184c4d41c7b25bb9f413c284c749d0 Mon Sep 17 00:00:00 2001
From: Elena Krismer <70535771+elena-krismer@users.noreply.github.com>
Date: Thu, 17 Oct 2024 17:56:50 +0200
Subject: [PATCH 02/11] try_query update add to news

---
 NEWS.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index f947cda2..be684c68 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,8 @@
+# protti ***
+
+## Bug fixes
+* `try_query()` now checks if a valid URL is provided before attempting to download data. 
+
 # protti 0.9.0
 
 ## New features 

From 57a64475409bd95402e2e558efc5b389c6cca751 Mon Sep 17 00:00:00 2001
From: jpquast <janphilipp.quast@gmail.com>
Date: Sun, 29 Sep 2024 20:30:24 +0200
Subject: [PATCH 03/11] fix vroom problem

The issue was that the uniprot data seems to now be gziped. I handle this case now in try_query. Not sure if it is generally handled for any potential case but it at least works for uniprot.
---
 DESCRIPTION      |  2 +-
 NAMESPACE        |  4 ++++
 R/try_query.R    | 35 +++++++++++++++++++++++++++++++++--
 man/try_query.Rd |  3 +--
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0faf4c66..d2b5740d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -43,7 +43,7 @@ Imports:
     methods,
     R.utils,
     stats
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Suggests: 
     testthat,
     covr,
diff --git a/NAMESPACE b/NAMESPACE
index 09754811..d81a3a5d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -116,6 +116,7 @@ importFrom(httr,modify_url)
 importFrom(httr,timeout)
 importFrom(janitor,clean_names)
 importFrom(janitor,make_clean_names)
+importFrom(jsonlite,fromJSON)
 importFrom(magrittr,"%>%")
 importFrom(methods,is)
 importFrom(plotly,ggplotly)
@@ -134,6 +135,7 @@ importFrom(purrr,pluck)
 importFrom(purrr,pmap)
 importFrom(purrr,reduce)
 importFrom(purrr,set_names)
+importFrom(readr,read_csv)
 importFrom(readr,read_tsv)
 importFrom(readr,write_csv)
 importFrom(readr,write_tsv)
@@ -191,3 +193,5 @@ importFrom(utils,data)
 importFrom(utils,download.file)
 importFrom(utils,head)
 importFrom(utils,untar)
+importFrom(xml2,read_html)
+importFrom(xml2,read_xml)
diff --git a/R/try_query.R b/R/try_query.R
index 42798def..27079134 100644
--- a/R/try_query.R
+++ b/R/try_query.R
@@ -13,7 +13,6 @@
 #' @param type a character value that specifies the type of data at the target URL. Options are
 #' all options that can be supplied to httr::content, these include e.g.
 #' "text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values".
-#' Default is "tab-separated-values".
 #' @param timeout a numeric value that specifies the maximum request time. Default is 60 seconds.
 #' @param accept a character value that specifies the type of data that should be sent by the API if
 #' it uses content negotiation. The default is NULL and it should only be set for APIs that use
@@ -22,6 +21,9 @@
 #'
 #' @importFrom curl has_internet
 #' @importFrom httr GET timeout http_error message_for_status http_status content accept
+#' @importFrom readr read_tsv read_csv
+#' @importFrom jsonlite fromJSON
+#' @importFrom xml2 read_html read_xml
 #'
 #' @return A data frame that contains the table from the url.
 try_query <-
@@ -94,7 +96,36 @@ try_query <-
     # Change variable to not show progress if readr is used
     options(readr.show_progress = FALSE)
 
-    result <- suppressMessages(httr::content(query_result, type = type, encoding = "UTF-8", ...))
+    # Check if the content is gzip compressed
+    if (query_result$headers[["content-encoding"]] == "gzip") {
+      # Retrieve the content as raw bytes using httr::content
+      raw_content <- httr::content(query_result, type = "raw")
+
+      # Decompress the raw content using base R's `memDecompress`
+      decompressed_content <- memDecompress(raw_content, type = "gzip")
+
+      # Convert the raw bytes to a character string
+      text_content <- rawToChar(decompressed_content)
+
+      # Read the decompressed content based on the specified type
+      if (type == "text/tab-separated-values") {
+        result <- readr::read_tsv(text_content, ...)
+      } else if (type == "text/html") {
+        result <- xml2::read_html(text_content, ...)
+      } else if (type == "text/xml") {
+        result <- xml2::read_xml(text_content, ...)
+      } else if (type == "text/csv" || type == "txt/csv") {
+        result <- readr::read_csv(text_content, ...)
+      } else if (type == "application/json") {
+        result <- jsonlite::fromJSON(text_content, ...)  # Using jsonlite for JSON parsing
+      } else if (type == "text") {
+        result <- text_content  # Return raw text as-is
+      } else {
+        stop("Unsupported content type: ", type)
+      }
+    } else {
+      result <- suppressMessages(httr::content(query_result, type = type, encoding = "UTF-8", ...))
+    }
 
     return(result)
   }
diff --git a/man/try_query.Rd b/man/try_query.Rd
index 90d61467..cb9a64aa 100644
--- a/man/try_query.Rd
+++ b/man/try_query.Rd
@@ -26,8 +26,7 @@ that failed.}
 
 \item{type}{a character value that specifies the type of data at the target URL. Options are
 all options that can be supplied to httr::content, these include e.g.
-"text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values".
-Default is "tab-separated-values".}
+"text/tab-separated-values", "application/json" and "txt/csv". Default is "text/tab-separated-values".}
 
 \item{timeout}{a numeric value that specifies the maximum request time. Default is 60 seconds.}
 

From badce14f92cbadba787c3073221fbb221d7c5449 Mon Sep 17 00:00:00 2001
From: jpquast <janphilipp.quast@gmail.com>
Date: Sun, 29 Sep 2024 21:39:54 +0200
Subject: [PATCH 04/11] Add xml2 and jsonlite to suggests

---
 DESCRIPTION | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d2b5740d..81db7349 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -67,7 +67,9 @@ Suggests:
     iq,
     scales,
     farver,
-    ggforce
+    ggforce,
+    xml2,
+    jsonlite
 Depends: 
     R (>= 4.0)
 URL: https://github.com/jpquast/protti, https://jpquast.github.io/protti/

From d4674f065f3dbeab6dc1e3735c4bec11e44ddd71 Mon Sep 17 00:00:00 2001
From: jpquast <janphilipp.quast@gmail.com>
Date: Sun, 29 Sep 2024 22:03:37 +0200
Subject: [PATCH 05/11] Fixed another bug in try_query

---
 R/try_query.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/try_query.R b/R/try_query.R
index 27079134..eb3ad46b 100644
--- a/R/try_query.R
+++ b/R/try_query.R
@@ -97,7 +97,7 @@ try_query <-
     options(readr.show_progress = FALSE)
 
     # Check if the content is gzip compressed
-    if (query_result$headers[["content-encoding"]] == "gzip") {
+    if (!is.null(query_result$headers[["content-encoding"]]) && query_result$headers[["content-encoding"]] == "gzip") {
       # Retrieve the content as raw bytes using httr::content
       raw_content <- httr::content(query_result, type = "raw")
 

From 0c6fe0dac48aacde960de6f0d5271ba93806887b Mon Sep 17 00:00:00 2001
From: elena-krismer <elena-krismer@users.noreply.github.com>
Date: Thu, 17 Oct 2024 16:44:19 +0000
Subject: [PATCH 06/11] Style code (GHA)

---
 R/try_query.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/try_query.R b/R/try_query.R
index eb3ad46b..de4ce8d7 100644
--- a/R/try_query.R
+++ b/R/try_query.R
@@ -117,9 +117,9 @@ try_query <-
       } else if (type == "text/csv" || type == "txt/csv") {
         result <- readr::read_csv(text_content, ...)
       } else if (type == "application/json") {
-        result <- jsonlite::fromJSON(text_content, ...)  # Using jsonlite for JSON parsing
+        result <- jsonlite::fromJSON(text_content, ...) # Using jsonlite for JSON parsing
       } else if (type == "text") {
-        result <- text_content  # Return raw text as-is
+        result <- text_content # Return raw text as-is
       } else {
         stop("Unsupported content type: ", type)
       }

From b66dafbfd520f895f9a407200de7cbf8e53cd98a Mon Sep 17 00:00:00 2001
From: Elena Krismer <70535771+elena-krismer@users.noreply.github.com>
Date: Fri, 18 Oct 2024 15:49:45 +0200
Subject: [PATCH 07/11] fix decompression

---
 R/try_query.R | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/try_query.R b/R/try_query.R
index de4ce8d7..3f7b3a50 100644
--- a/R/try_query.R
+++ b/R/try_query.R
@@ -96,11 +96,13 @@ try_query <-
     # Change variable to not show progress if readr is used
     options(readr.show_progress = FALSE)
 
-    # Check if the content is gzip compressed
-    if (!is.null(query_result$headers[["content-encoding"]]) && query_result$headers[["content-encoding"]] == "gzip") {
-      # Retrieve the content as raw bytes using httr::content
-      raw_content <- httr::content(query_result, type = "raw")
+    # Retrieve the content as raw bytes using httr::content
+    raw_content <- httr::content(query_result, type = "raw")
+    # Check for gzip magic number (1f 8b) before decompression
+    compressed <- length(raw_content) >= 2 && raw_content[1] == as.raw(0x1f) && raw_content[2] == as.raw(0x8b)
 
+    # Check if the content is gzip compressed
+    if (!is.null(query_result$headers[["content-encoding"]]) && query_result$headers[["content-encoding"]] == "gzip" && compressed) {
       # Decompress the raw content using base R's `memDecompress`
       decompressed_content <- memDecompress(raw_content, type = "gzip")
 

From ea69a89b4a55ad59479eee6195b37af58dcc41a3 Mon Sep 17 00:00:00 2001
From: Elena Krismer <70535771+elena-krismer@users.noreply.github.com>
Date: Fri, 18 Oct 2024 18:35:21 +0200
Subject: [PATCH 08/11] move xml2 and jsonlite to imports

---
 DESCRIPTION | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 81db7349..55458e24 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -42,7 +42,9 @@ Imports:
     httr,
     methods,
     R.utils,
-    stats
+    stats,
+    xml2,
+    jsonlite
 RoxygenNote: 7.3.2
 Suggests: 
     testthat,
@@ -67,9 +69,7 @@ Suggests:
     iq,
     scales,
     farver,
-    ggforce,
-    xml2,
-    jsonlite
+    ggforce
 Depends: 
     R (>= 4.0)
 URL: https://github.com/jpquast/protti, https://jpquast.github.io/protti/

From 33cc8ee35286a5be808b30852045f83d9458edf1 Mon Sep 17 00:00:00 2001
From: jpquast <janphilipp.quast@gmail.com>
Date: Sat, 19 Oct 2024 12:15:14 +0200
Subject: [PATCH 09/11] Corrected try_query error handling

---
 DESCRIPTION   | 10 +++++-----
 NAMESPACE     |  3 ---
 NEWS.md       |  2 +-
 R/try_query.R | 17 ++++++++---------
 4 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 55458e24..5a443cf7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: protti
 Title: Bottom-Up Proteomics and LiP-MS Quality Control and Data Analysis Tools
-Version: 0.9.0
+Version: 0.9.0.9000
 Authors@R: 
     c(person(given = "Jan-Philipp",
            family = "Quast",
@@ -42,9 +42,7 @@ Imports:
     httr,
     methods,
     R.utils,
-    stats,
-    xml2,
-    jsonlite
+    stats
 RoxygenNote: 7.3.2
 Suggests: 
     testthat,
@@ -69,7 +67,9 @@ Suggests:
     iq,
     scales,
     farver,
-    ggforce
+    ggforce,
+    xml2,
+    jsonlite
 Depends: 
     R (>= 4.0)
 URL: https://github.com/jpquast/protti, https://jpquast.github.io/protti/
diff --git a/NAMESPACE b/NAMESPACE
index d81a3a5d..e4e57730 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -116,7 +116,6 @@ importFrom(httr,modify_url)
 importFrom(httr,timeout)
 importFrom(janitor,clean_names)
 importFrom(janitor,make_clean_names)
-importFrom(jsonlite,fromJSON)
 importFrom(magrittr,"%>%")
 importFrom(methods,is)
 importFrom(plotly,ggplotly)
@@ -193,5 +192,3 @@ importFrom(utils,data)
 importFrom(utils,download.file)
 importFrom(utils,head)
 importFrom(utils,untar)
-importFrom(xml2,read_html)
-importFrom(xml2,read_xml)
diff --git a/NEWS.md b/NEWS.md
index be684c68..1c25e436 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,7 +1,7 @@
 # protti ***
 
 ## Bug fixes
-* `try_query()` now checks if a valid URL is provided before attempting to download data. 
+* `try_query()` now correctly handles errors that don't return a response object. We also handle gzip decompression problems better since some databases compressed responses were not handled correctly. 
 
 # protti 0.9.0
 
diff --git a/R/try_query.R b/R/try_query.R
index 3f7b3a50..77b4403f 100644
--- a/R/try_query.R
+++ b/R/try_query.R
@@ -22,8 +22,6 @@
 #' @importFrom curl has_internet
 #' @importFrom httr GET timeout http_error message_for_status http_status content accept
 #' @importFrom readr read_tsv read_csv
-#' @importFrom jsonlite fromJSON
-#' @importFrom xml2 read_html read_xml
 #'
 #' @return A data frame that contains the table from the url.
 try_query <-
@@ -33,12 +31,6 @@ try_query <-
       stop("The timeout cannot be less than 1 second.")
     }
 
-    # Check if the URL is valid
-    parsed_url <- httr::parse_url(url)
-    if (is.null(parsed_url$scheme) || is.null(parsed_url$hostname)) {
-      stop("The provided URL is not valid. Please check the format.")
-    }
-
     # Check if there is an internet connection first
     if (!curl::has_internet()) {
       if (!silent) message("\nNo internet connection.")
@@ -85,11 +77,18 @@ try_query <-
       return(invisible("No internet connection"))
     }
 
-    if (httr::http_error(query_result)) {
+    # If response was an error return that error message
+    if (inherits(query_result, "response") && httr::http_error(query_result)) {
       if (!silent) httr::message_for_status(query_result)
       return(invisible(httr::http_status(query_result)$message))
     }
 
+    # Handle other types of errors separately from query errors
+    if(inherits(query_result, "character")) {
+      if (!silent) message(query_result)
+      return(invisible(query_result))
+    }
+
     # Record readr progress variable to set back later
     readr_show_progress <- getOption("readr.show_progress")
     on.exit(options(readr.show_progress = readr_show_progress))

From 02d01a2fcce071fda0ed465655fb30535310a38f Mon Sep 17 00:00:00 2001
From: jpquast <jpquast@users.noreply.github.com>
Date: Sat, 19 Oct 2024 10:17:15 +0000
Subject: [PATCH 10/11] Style code (GHA)

---
 R/try_query.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/try_query.R b/R/try_query.R
index 77b4403f..2199c32b 100644
--- a/R/try_query.R
+++ b/R/try_query.R
@@ -84,7 +84,7 @@ try_query <-
     }
 
     # Handle other types of errors separately from query errors
-    if(inherits(query_result, "character")) {
+    if (inherits(query_result, "character")) {
       if (!silent) message(query_result)
       return(invisible(query_result))
     }

From a64c5c5538d94a8fa003711466484a7bbae41633 Mon Sep 17 00:00:00 2001
From: jpquast <janphilipp.quast@gmail.com>
Date: Sat, 19 Oct 2024 12:57:39 +0200
Subject: [PATCH 11/11] Fix mobidb test

---
 tests/testthat/test-fetch_extract_and_enrichment_functions.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-fetch_extract_and_enrichment_functions.R b/tests/testthat/test-fetch_extract_and_enrichment_functions.R
index 6f420b21..d24ea4f3 100644
--- a/tests/testthat/test-fetch_extract_and_enrichment_functions.R
+++ b/tests/testthat/test-fetch_extract_and_enrichment_functions.R
@@ -21,7 +21,7 @@ if (Sys.getenv("TEST_PROTTI") == "true") {
     unis <- c("iRT", "P25437", "P30870", "P0A6P9")
     expect_warning(mobidb <- fetch_mobidb(unis))
     expect_is(mobidb, "data.frame")
-    expect_equal(nrow(mobidb), 259)
+    expect_equal(nrow(mobidb), 221)
     expect_equal(ncol(mobidb), 6)
   })