Skip to content

Commit

Permalink
feat(reacherhq#289): add haveibeenpwned check
Browse files Browse the repository at this point in the history
  • Loading branch information
sylvain-reynaud committed Jan 31, 2023
1 parent a4fe57e commit e9e706b
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 10 deletions.
106 changes: 104 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions backend/tests/check_email.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use reacher_backend::routes::create_routes;
use warp::http::StatusCode;
use warp::test::request;

const FOO_BAR_RESPONSE: &str = r#"{"input":"foo@bar","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#;
const FOO_BAR_BAZ_RESPONSE: &str = r#"{"input":"foo@bar.baz","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":"foo@bar.baz","domain":"bar.baz","is_valid_syntax":true,"username":"foo","normalized_email":"foo@bar.baz","suggestion":null}}"#;
const FOO_BAR_RESPONSE: &str = r#"{"input":"foo@bar","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned_breaches_found":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#;
const FOO_BAR_BAZ_RESPONSE: &str = r#"{"input":"foo@bar.baz","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned_breaches_found":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":"foo@bar.baz","domain":"bar.baz","is_valid_syntax":true,"username":"foo","normalized_email":"foo@bar.baz","suggestion":null}}"#;

#[tokio::test]
async fn test_input_foo_bar() {
Expand Down
7 changes: 6 additions & 1 deletion cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ pub struct Cli {
#[clap(long, env, default_value = "false", parse(try_from_str))]
pub check_gravatar: bool,

/// HaveIBeenPnwed API key, ignore if not provided.
#[clap(long, env, parse(try_from_str))]
pub haveibeenpwned_api_key: Option<String>,

/// The email to check.
pub to_email: String,
}
Expand All @@ -99,7 +103,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
.set_gmail_use_api(CONF.gmail_use_api)
.set_microsoft365_use_api(CONF.microsoft365_use_api)
.set_check_gravatar(CONF.check_gravatar)
.set_hotmail_use_headless(CONF.hotmail_use_headless.clone());
.set_hotmail_use_headless(CONF.hotmail_use_headless.clone())
.set_haveibeenpwned_api_key(CONF.haveibeenpwned_api_key.clone());

if let Some(proxy_host) = &CONF.proxy_host {
input.set_proxy(CheckEmailInputProxy {
Expand Down
1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ serde_json = "1.0.91"
trust-dns-proto = "0.21.2"
md5 = "0.7.0"
levenshtein = "1.0.5"
pwned = { git = "https://github.com/wisespace-io/pwned-rs.git", rev = "bef23d48c40ab78f59ecc7550e128b88ee55116c" }

[dev-dependencies]
tokio = { version = "1.23.0" }
Expand Down
49 changes: 49 additions & 0 deletions core/src/haveibeenpwned.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// check-if-email-exists
// Copyright (C) 2018-2022 Reacher

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.

// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

use crate::util::constants::LOG_TARGET;
use pwned::api::PwnedBuilder;

/// Check if the email has been found in any breach or paste using the
/// HaveIBeenPwned API.
/// This function will return the number of times the email has been found in
/// any breach.
pub async fn check_haveibeenpwned(to_email: &str, api_key: Option<String>) -> u64 {
let pwned = PwnedBuilder::default()
.user_agent("reacher")
.api_key(api_key)
.build()
.unwrap();

match pwned.check_email(to_email).await {
Ok(answer) => {
log::debug!(
target: LOG_TARGET,
"Email found in {} breaches",
answer.len()
);
answer.len() as u64
}
Err(e) => {
log::error!(
target: LOG_TARGET,
"Error while checking if email has been pwned: {}",
e
);
0_u64
}
}
}
8 changes: 7 additions & 1 deletion core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
//! ```
pub mod gravatar;
mod haveibeenpwned;
pub mod misc;
pub mod mx;
mod normalize;
Expand Down Expand Up @@ -177,7 +178,12 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
.collect::<Vec<String>>()
);

let my_misc = check_misc(&my_syntax, input.check_gravatar).await;
let my_misc = check_misc(
&my_syntax,
input.check_gravatar,
input.haveibeenpwned_api_key.clone(),
)
.await;
log::debug!(
target: LOG_TARGET,
"[email={}] Found the following misc details: {:?}",
Expand Down
18 changes: 17 additions & 1 deletion core/src/misc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.

use crate::gravatar::check_gravatar;
use crate::haveibeenpwned::check_haveibeenpwned;

use super::syntax::SyntaxDetails;
use serde::{Deserialize, Serialize};
Expand All @@ -30,6 +31,9 @@ pub struct MiscDetails {
/// Is this email a role-based account?
pub is_role_account: bool,
pub gravatar_url: Option<String>,
/// Is this email address listed in the haveibeenpwned database for
/// previous breaches?
pub haveibeenpwned_breaches_found: Option<u64>,
}

/// Error occured connecting to this email server via SMTP. Right now this
Expand All @@ -40,7 +44,11 @@ pub struct MiscDetails {
pub enum MiscError {}

/// Fetch misc details about the email address, such as whether it's disposable.
pub async fn check_misc(syntax: &SyntaxDetails, cfg_check_gravatar: bool) -> MiscDetails {
pub async fn check_misc(
syntax: &SyntaxDetails,
cfg_check_gravatar: bool,
haveibeenpwned_api_key: Option<String>,
) -> MiscDetails {
let role_accounts: Vec<&str> =
serde_json::from_str(ROLE_ACCOUNTS).expect("roles.json is a valid json. qed.");

Expand All @@ -56,12 +64,20 @@ pub async fn check_misc(syntax: &SyntaxDetails, cfg_check_gravatar: bool) -> Mis
gravatar_url = check_gravatar(address.as_ref()).await;
}

let mut haveibeenpwned_breaches_found: Option<u64> = None;

if haveibeenpwned_api_key.is_some() {
haveibeenpwned_breaches_found =
Some(check_haveibeenpwned(address.as_ref(), haveibeenpwned_api_key).await);
}

MiscDetails {
// mailchecker::is_valid checks also if the syntax is valid. But if
// we're here, it means we're sure the syntax is valid, so is_valid
// actually will only check if it's disposable.
is_disposable: !mailchecker::is_valid(address.as_ref()),
is_role_account: role_accounts.contains(&syntax.username.to_lowercase().as_ref()),
gravatar_url,
haveibeenpwned_breaches_found,
}
}
17 changes: 14 additions & 3 deletions core/src/util/input_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ pub struct CheckEmailInput {
//
// Defaults to false.
pub check_gravatar: bool,
/// Check if a the email address is present in HaveIBeenPwned API.
// If the api_key is filled, HaveIBeenPwned API is checked
pub haveibeenpwned_api_key: Option<String>,
/// For Hotmail/Outlook email addresses, use a headless navigator
/// connecting to the password recovery page instead of the SMTP server.
/// This assumes you have a WebDriver compatible process running, then pass
Expand Down Expand Up @@ -152,6 +155,7 @@ impl Default for CheckEmailInput {
gmail_use_api: false,
microsoft365_use_api: false,
check_gravatar: false,
haveibeenpwned_api_key: None,
retries: 2,
}
}
Expand Down Expand Up @@ -282,6 +286,13 @@ impl CheckEmailInput {
self
}

/// Whether to haveibeenpwned' API for the given email
/// check only if the api_key is set
pub fn set_haveibeenpwned_api_key(&mut self, api_key: Option<String>) -> &mut CheckEmailInput {
self.haveibeenpwned_api_key = api_key;
self
}

/// Set whether or not to use a headless navigator to navigate to Hotmail's
/// password recovery page to check if an email exists. If set to
/// `Some(<endpoint>)`, this endpoint must point to a WebDriver process,
Expand Down Expand Up @@ -433,20 +444,20 @@ mod tests {
let res = dummy_response_with_message("blacklist");
let actual = serde_json::to_string(&res).unwrap();
// Make sure the `description` is present with IpBlacklisted.
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: blacklist"},"description":"IpBlacklisted"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#;
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned_breaches_found":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: blacklist"},"description":"IpBlacklisted"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#;
assert_eq!(expected, actual);

let res =
dummy_response_with_message("Client host rejected: cannot find your reverse hostname");
let actual = serde_json::to_string(&res).unwrap();
// Make sure the `description` is present with NeedsRDNs.
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: Client host rejected: cannot find your reverse hostname"},"description":"NeedsRDNS"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#;
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned_breaches_found":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: Client host rejected: cannot find your reverse hostname"},"description":"NeedsRDNS"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#;
assert_eq!(expected, actual);

let res = dummy_response_with_message("foobar");
let actual = serde_json::to_string(&res).unwrap();
// Make sure the `description` is NOT present.
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: foobar"}},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#;
let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned_breaches_found":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: foobar"}},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#;
assert_eq!(expected, actual);
}
}

0 comments on commit e9e706b

Please sign in to comment.