From 3506812f365af495823fd7ee76acc415d7e10c4d Mon Sep 17 00:00:00 2001 From: ClementCadieux Date: Tue, 4 Apr 2023 15:13:11 +0000 Subject: [PATCH 1/5] =?UTF-8?q?Corrig=C3=A9=20l'erreur=20avec=20interventi?= =?UTF-8?q?on=5Fcount?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrapers/parliament-mash/quebec-debates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/parliament-mash/quebec-debates.R b/scrapers/parliament-mash/quebec-debates.R index 453a5b0..89120d1 100644 --- a/scrapers/parliament-mash/quebec-debates.R +++ b/scrapers/parliament-mash/quebec-debates.R @@ -467,7 +467,7 @@ for (i in 1:length(list_urls)) { intervention_text <- substr(doc_text[j], unlist(gregexpr(":", paragraph_start))+1, nchar(doc_text[j])) intervention_text <- gsub("(?<=[\\s])\\s*|^\\s+|\\s+$", "", intervention_text, perl=TRUE) matching_row <- NULL - intervention_count <<- intervention_count - 1 + intervention_count <<- intervention_count + 1 next } From 4d8edc1c579e04945a9e2778d351e9acaa00da6c Mon Sep 17 00:00:00 2001 From: ClementCadieux Date: Tue, 4 Apr 2023 17:39:32 +0000 Subject: [PATCH 2/5] =?UTF-8?q?Retour=20=C3=A0=20l'=C3=A9tat=20initial?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrapers/parliament-mash/quebec-debates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/parliament-mash/quebec-debates.R b/scrapers/parliament-mash/quebec-debates.R index 89120d1..453a5b0 100644 --- a/scrapers/parliament-mash/quebec-debates.R +++ b/scrapers/parliament-mash/quebec-debates.R @@ -467,7 +467,7 @@ for (i in 1:length(list_urls)) { intervention_text <- substr(doc_text[j], unlist(gregexpr(":", paragraph_start))+1, nchar(doc_text[j])) intervention_text <- gsub("(?<=[\\s])\\s*|^\\s+|\\s+$", "", intervention_text, perl=TRUE) matching_row <- NULL - intervention_count <<- intervention_count + 1 + intervention_count <<- intervention_count - 1 next } From de9f3c5bc8621de12e5e706e3640ce1832161dae Mon Sep 17 00:00:00 2001 From: ClementCadieux Date: Tue, 4 Apr 2023 17:57:03 +0000 Subject: [PATCH 3/5] Tentative de solutions --- scrapers/parliament-mash/quebec-debates.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scrapers/parliament-mash/quebec-debates.R b/scrapers/parliament-mash/quebec-debates.R index 453a5b0..88ce29c 100644 --- a/scrapers/parliament-mash/quebec-debates.R +++ b/scrapers/parliament-mash/quebec-debates.R @@ -25,7 +25,7 @@ library(dplyr) status <<- 0 debate_count <<- 0 -intervention_count <<- 0 +total_intervention_count <<- 0 final_message <<- "" @@ -158,6 +158,7 @@ list_urls <- doc_urls[grep("assemblee-nationale/4\\d-\\d/journal-debats", doc_ur # for (i in 1:length(list_urls)) { + intervention_count <<- 0 #if (opt$hub_mode != "skip") clessnhub::refresh_token(configuration$token, configuration$url) if (opt$hub_mode != "skip") clessnhub::connect_with_token(Sys.getenv('HUB_TOKEN')) @@ -886,14 +887,17 @@ for (i in 1:length(list_urls)) { } #if (grepl("actualites-salle-presse", event_url)) - if (intervention_count > 1) debate_count <- debate_count + 1 + if (intervention_count > 1) { + debate_count <- debate_count + 1 + total_intervention_count <<- total_intervention_count + intervention_count + } } #for (i in 1:nrow(result)) -if (intervention_count < 0) intervention_count <- 0 +if (total_intervention_count < 0) total_intervention_count <- 0 clessnverse::logit(scriptname, final_message, logger) -clessnverse::logit(scriptname, paste(debate_count, "debates were added to the hub totalling", intervention_count, "interventions"), logger) +clessnverse::logit(scriptname, paste(debate_count, "debates were added to the hub totalling", total_intervention_count, "interventions"), logger) clessnverse::logit(scriptname, paste("reaching end of", scriptname, "script"), logger = logger) logger <- clessnverse::logclose(logger) quit(save="no", status = status) From e58d71193f79e04c6b06973443aab78dd680a577 Mon Sep 17 00:00:00 2001 From: ClementCadieux Date: Thu, 6 Apr 2023 14:49:49 +0000 Subject: [PATCH 4/5] Added a log at the end --- scrapers/parliament-mash/quebec-debates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/parliament-mash/quebec-debates.R b/scrapers/parliament-mash/quebec-debates.R index 88ce29c..3593496 100644 --- a/scrapers/parliament-mash/quebec-debates.R +++ b/scrapers/parliament-mash/quebec-debates.R @@ -886,7 +886,7 @@ for (i in 1:length(list_urls)) { } # version finale } #if (grepl("actualites-salle-presse", event_url)) - + clessnverse::logit(scriptname, paste("intervention_count: ", intervention_count), logger) if (intervention_count > 1) { debate_count <- debate_count + 1 total_intervention_count <<- total_intervention_count + intervention_count From 187d758821cea89dae943c2a352e8c6ff599edb4 Mon Sep 17 00:00:00 2001 From: ClementCadieux Date: Thu, 6 Apr 2023 20:36:18 +0000 Subject: [PATCH 5/5] =?UTF-8?q?Comptes=20les=20interventions=20skipp=C3=A9?= =?UTF-8?q?es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scrapers/parliament-mash/quebec-debates.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scrapers/parliament-mash/quebec-debates.R b/scrapers/parliament-mash/quebec-debates.R index 3593496..7ba5e5d 100644 --- a/scrapers/parliament-mash/quebec-debates.R +++ b/scrapers/parliament-mash/quebec-debates.R @@ -159,6 +159,7 @@ list_urls <- doc_urls[grep("assemblee-nationale/4\\d-\\d/journal-debats", doc_ur for (i in 1:length(list_urls)) { intervention_count <<- 0 + skipped_intervention <- 0 #if (opt$hub_mode != "skip") clessnhub::refresh_token(configuration$token, configuration$url) if (opt$hub_mode != "skip") clessnhub::connect_with_token(Sys.getenv('HUB_TOKEN')) @@ -401,6 +402,7 @@ for (i in 1:length(list_urls)) { event_sentence_count <- clessnverse::countVecSentences(doc_text) - 1 speech_paragraph_count <- 0 + skipped_intervention <- 0 for (j in 1:length(doc_text)) { #cat(j, intervention_count, "\r") cat(intervention_count, "\r") @@ -469,6 +471,7 @@ for (i in 1:length(list_urls)) { intervention_text <- gsub("(?<=[\\s])\\s*|^\\s+|\\s+$", "", intervention_text, perl=TRUE) matching_row <- NULL intervention_count <<- intervention_count - 1 + skipped_intervention <<- skipped_intervention + 1 next } @@ -887,6 +890,8 @@ for (i in 1:length(list_urls)) { } #if (grepl("actualites-salle-presse", event_url)) clessnverse::logit(scriptname, paste("intervention_count: ", intervention_count), logger) + clessnverse::logit(scriptname, paste("Skipped: ", skipped_intervention), logger) + if (intervention_count > 1) { debate_count <- debate_count + 1 total_intervention_count <<- total_intervention_count + intervention_count