From 797deb6457695d79c57e96930dd37f2672e7a94f Mon Sep 17 00:00:00 2001 From: Nicolas Delsaux Date: Sun, 8 Sep 2019 17:46:16 +0200 Subject: [PATCH] Fixes some troubles regarding rfc2822 date parsing --- Cargo.toml | 1 + src/feed.rs | 31 ++++++++++++++++++------------- src/main.rs | 1 + 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5c9ca33..88366d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ treexml = "0.7" atom_syndication = "0.6" rss = "1.8" chrono = { version = "0.4", features = ["serde"] } +rfc822_sanitizer = "0.3.2" # A good html parser to allow generation of valid mail messages # kuchiki is in fact a layer over html5ever, which adds easier tree traversal/transformation kuchiki = "0.7" diff --git a/src/feed.rs b/src/feed.rs index 49d075c..13bf5ef 100644 --- a/src/feed.rs +++ b/src/feed.rs @@ -179,12 +179,7 @@ fn extract_authors_from_rss(entry: &RssItem, feed: &RssChannel) -> Vec { Some(l) => message_authors = vec![l.to_owned()], _ => message_authors = vec![feed.title().to_owned()], } - message_authors = message_authors - .iter() - .map(|author| (author, author.replace(" ", "_"))) - .map(|tuple| format!("{} <{}@{}>", tuple.0, tuple.1, domain)) - .collect(); - message_authors + sanitize_message_authors(message_authors, domain) } fn find_rss_domain(feed: &RssChannel) -> String { return Some(feed.link()) @@ -230,12 +225,11 @@ fn extract_from_rss(entry: &RssItem, feed: &RssChannel) -> Message { fn extract_date_from_rss(entry: &RssItem) -> NaiveDateTime { if entry.pub_date().is_some() { - let mut pub_date = str::replace(entry.pub_date().unwrap(), "-0000", "+0000"); - pub_date = str::replace(&pub_date, "+00:00", "+0000"); - return DateTime::parse_from_rfc2822(&pub_date) + let pub_date = entry.pub_date().unwrap().to_owned(); + return rfc822_sanitizer::parse_from_rfc2822_with_fallback(&pub_date) .unwrap_or_else(|e| { panic!( - "pub_date for item {:?} (value is {:?}) can't be parsed. {:?}", + "pub_date for item {:?} (value is {:?}) can't be parsed as rfc2822. {:?}", &entry, pub_date, e ) }) @@ -272,12 +266,23 @@ fn extract_authors_from_atom(entry: &AtomEntry, feed: &AtomFeed) -> Vec if message_authors.is_empty() { message_authors = vec![feed.title().to_owned()] } - message_authors = message_authors + sanitize_message_authors(message_authors, domain) +} + +fn sanitize_message_authors(message_authors:Vec, domain:String)->Vec { + let fixed = message_authors .iter() - .map(|author| (author, author.replace(" ", "_"))) + // ni next line, we create a tuple to be used to generate the email address + .map(|author| (author, // first element of tuple is email displayed name + author.to_lowercase() // second element of tuple is generated user address + .replace(" ", "_") + .replace("&", "and") + .replace(",;:!", "") + .replace("ï", "i") + )) .map(|tuple| format!("{} <{}@{}>", tuple.0, tuple.1, domain)) .collect(); - message_authors + return fixed; } fn find_atom_domain(feed: &AtomFeed) -> String { diff --git a/src/main.rs b/src/main.rs index 35f478e..22bd630 100644 --- a/src/main.rs +++ b/src/main.rs @@ -123,6 +123,7 @@ extern crate flexi_logger; extern crate treexml; extern crate chrono; +extern crate rfc822_sanitizer; #[macro_use] extern crate tera;