Skip to content

Commit

Permalink
Merge e7e72be into 54724e7
Browse files Browse the repository at this point in the history
  • Loading branch information
cansavvy authored Apr 1, 2022
2 parents 54724e7 + e7e72be commit 755337f
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
1 change: 1 addition & 0 deletions resources/ignore-urls.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
http://www.someurl.html
https://jhudatascience.org/{Course_Name}}
http://www.someurl.html
23 changes: 11 additions & 12 deletions scripts/url-check.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,12 @@ test_url <- function(url) {
get_urls <- function(file) {
# Read in a file and return the urls from it
content <- readLines(file)
content <- grep("http[s]?://|com$|www", content, value = TRUE)
url_pattern <- "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
content <- grep("http[s]?://", content, value = TRUE)
url_pattern <- "http[s]?://.+?[\"|\\)| |,]"
urls <- stringr::str_extract(content, url_pattern)
urls <- urls[!is.na(urls)]
if (length(urls) > 0 ){
urls <- gsub(")$|)\\.|\\),|\\)|,", "", urls)
urls <- gsub("\\)$|\"|)$", "", urls)
urls_status <- sapply(urls, test_url)
url_df <- data.frame(urls, urls_status, file)
return(url_df)
Expand All @@ -52,25 +53,23 @@ get_urls <- function(file) {
all_urls <- lapply(files, get_urls)

# Write the file
all_urls_df <- dplyr::bind_rows(all_urls)
all_urls_df <- dplyr::bind_rows(all_urls) %>%
dplyr::filter(!is.na(urls))

if (nrow(all_urls_df) > 0) {
all_urls_df <- all_urls_df %>%
failed_urls_df <- all_urls_df %>%
dplyr::filter(urls_status == "failed")
} else {
all_urls_df <- data.frame(errors = NA)
failed_urls_df <- data.frame(errors = NA)
}

all_urls_df <- all_urls_df %>%
failed_urls_df <- failed_urls_df %>%
dplyr::filter(!(urls %in% ignore_urls))

# Print out how many spell check errors
write(nrow(all_urls_df), stdout())

# Save spell errors to file temporarily
readr::write_tsv(all_urls_df, output_file)
readr::write_tsv(failed_urls_df, output_file)

message(paste0("Saved to: ", output_file))

# Print out how many spell check errors
write(nrow(all_urls_df), stdout())
write(nrow(failed_urls_df), stdout())

0 comments on commit 755337f

Please sign in to comment.