Skip to content

Commit

Permalink
Fixes some troubles regarding rfc2822 date parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Riduidel committed Sep 8, 2019
1 parent c67c2e5 commit 797deb6
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 13 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ treexml = "0.7"
atom_syndication = "0.6"
rss = "1.8"
chrono = { version = "0.4", features = ["serde"] }
rfc822_sanitizer = "0.3.2"
# A good html parser to allow generation of valid mail messages
# kuchiki is in fact a layer over html5ever, which adds easier tree traversal/transformation
kuchiki = "0.7"
Expand Down
31 changes: 18 additions & 13 deletions src/feed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,7 @@ fn extract_authors_from_rss(entry: &RssItem, feed: &RssChannel) -> Vec<String> {
Some(l) => message_authors = vec![l.to_owned()],
_ => message_authors = vec![feed.title().to_owned()],
}
message_authors = message_authors
.iter()
.map(|author| (author, author.replace(" ", "_")))
.map(|tuple| format!("{} <{}@{}>", tuple.0, tuple.1, domain))
.collect();
message_authors
sanitize_message_authors(message_authors, domain)
}
fn find_rss_domain(feed: &RssChannel) -> String {
return Some(feed.link())
Expand Down Expand Up @@ -230,12 +225,11 @@ fn extract_from_rss(entry: &RssItem, feed: &RssChannel) -> Message {

fn extract_date_from_rss(entry: &RssItem) -> NaiveDateTime {
if entry.pub_date().is_some() {
let mut pub_date = str::replace(entry.pub_date().unwrap(), "-0000", "+0000");
pub_date = str::replace(&pub_date, "+00:00", "+0000");
return DateTime::parse_from_rfc2822(&pub_date)
let pub_date = entry.pub_date().unwrap().to_owned();
return rfc822_sanitizer::parse_from_rfc2822_with_fallback(&pub_date)
.unwrap_or_else(|e| {
panic!(
"pub_date for item {:?} (value is {:?}) can't be parsed. {:?}",
"pub_date for item {:?} (value is {:?}) can't be parsed as rfc2822. {:?}",
&entry, pub_date, e
)
})
Expand Down Expand Up @@ -272,12 +266,23 @@ fn extract_authors_from_atom(entry: &AtomEntry, feed: &AtomFeed) -> Vec<String>
if message_authors.is_empty() {
message_authors = vec![feed.title().to_owned()]
}
message_authors = message_authors
sanitize_message_authors(message_authors, domain)
}

fn sanitize_message_authors(message_authors:Vec<String>, domain:String)->Vec<String> {
let fixed = message_authors
.iter()
.map(|author| (author, author.replace(" ", "_")))
// ni next line, we create a tuple to be used to generate the email address
.map(|author| (author, // first element of tuple is email displayed name
author.to_lowercase() // second element of tuple is generated user address
.replace(" ", "_")
.replace("&", "and")
.replace(",;:!", "")
.replace("ï", "i")
))
.map(|tuple| format!("{} <{}@{}>", tuple.0, tuple.1, domain))
.collect();
message_authors
return fixed;
}

fn find_atom_domain(feed: &AtomFeed) -> String {
Expand Down
1 change: 1 addition & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ extern crate flexi_logger;
extern crate treexml;

extern crate chrono;
extern crate rfc822_sanitizer;

#[macro_use]
extern crate tera;
Expand Down

0 comments on commit 797deb6

Please sign in to comment.