diff --git a/scrapers/parliament-mash/quebec-debates.R b/scrapers/parliament-mash/quebec-debates.R index 453a5b0..7ba5e5d 100644 --- a/scrapers/parliament-mash/quebec-debates.R +++ b/scrapers/parliament-mash/quebec-debates.R @@ -25,7 +25,7 @@ library(dplyr) status <<- 0 debate_count <<- 0 -intervention_count <<- 0 +total_intervention_count <<- 0 final_message <<- "" @@ -158,6 +158,8 @@ list_urls <- doc_urls[grep("assemblee-nationale/4\\d-\\d/journal-debats", doc_ur # for (i in 1:length(list_urls)) { + intervention_count <<- 0 + skipped_intervention <- 0 #if (opt$hub_mode != "skip") clessnhub::refresh_token(configuration$token, configuration$url) if (opt$hub_mode != "skip") clessnhub::connect_with_token(Sys.getenv('HUB_TOKEN')) @@ -400,6 +402,7 @@ for (i in 1:length(list_urls)) { event_sentence_count <- clessnverse::countVecSentences(doc_text) - 1 speech_paragraph_count <- 0 + skipped_intervention <- 0 for (j in 1:length(doc_text)) { #cat(j, intervention_count, "\r") cat(intervention_count, "\r") @@ -468,6 +471,7 @@ for (i in 1:length(list_urls)) { intervention_text <- gsub("(?<=[\\s])\\s*|^\\s+|\\s+$", "", intervention_text, perl=TRUE) matching_row <- NULL intervention_count <<- intervention_count - 1 + skipped_intervention <<- skipped_intervention + 1 next } @@ -885,15 +889,20 @@ for (i in 1:length(list_urls)) { } # version finale } #if (grepl("actualites-salle-presse", event_url)) - - if (intervention_count > 1) debate_count <- debate_count + 1 + clessnverse::logit(scriptname, paste("intervention_count: ", intervention_count), logger) + clessnverse::logit(scriptname, paste("Skipped: ", skipped_intervention), logger) + + if (intervention_count > 1) { + debate_count <- debate_count + 1 + total_intervention_count <<- total_intervention_count + intervention_count + } } #for (i in 1:nrow(result)) -if (intervention_count < 0) intervention_count <- 0 +if (total_intervention_count < 0) total_intervention_count <- 0 clessnverse::logit(scriptname, final_message, logger) -clessnverse::logit(scriptname, paste(debate_count, "debates were added to the hub totalling", intervention_count, "interventions"), logger) +clessnverse::logit(scriptname, paste(debate_count, "debates were added to the hub totalling", total_intervention_count, "interventions"), logger) clessnverse::logit(scriptname, paste("reaching end of", scriptname, "script"), logger = logger) logger <- clessnverse::logclose(logger) quit(save="no", status = status)