diff --git a/src/http/reqwest/async_reqwest.rs b/src/http/reqwest/async_reqwest.rs index ea87d5f2a..288d119cc 100644 --- a/src/http/reqwest/async_reqwest.rs +++ b/src/http/reqwest/async_reqwest.rs @@ -1,24 +1,26 @@ use reqwest::{Client, Request}; -use reqwest::{Method, Error}; +use reqwest::Method; +use reqwest::Error as ReqwestError; use reqwest::header::HeaderValue; use url::{Origin, Url}; use reqwest::header::USER_AGENT; use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT}; use crate::parser::{ParseResult, parse_fetched_robots_txt}; use crate::model::FetchedRobotsTxt; +use crate::model::{Error, ErrorKind}; use std::pin::Pin; use futures::task::{Context, Poll}; use futures::Future; use futures::future::TryFutureExt; use futures::future::ok as future_ok; -type FetchFuture = Box>>; +type FetchFuture = Box>>; impl RobotsTxtClient for Client { - type Result = RobotsTxtResponse; + type Result = Result; fn fetch_robots_txt(&self, origin: Origin) -> Self::Result { let url = format!("{}/robots.txt", origin.unicode_serialization()); - let url = Url::parse(&url).expect("Unable to parse robots.txt url"); + let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?; let mut request = Request::new(Method::GET, url); let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT)); let response = self @@ -29,11 +31,11 @@ impl RobotsTxtClient for Client { return future_ok((response_info, response_text)); }); }); - let response: Pin>>> = Box::pin(response); - return RobotsTxtResponse { + let response: Pin>>> = Box::pin(response); + Ok(RobotsTxtResponse { origin, response, - } + }) } } @@ -55,7 +57,7 @@ impl RobotsTxtResponse { } impl Future for RobotsTxtResponse { - type Output = Result, Error>; + type Output = Result, ReqwestError>; fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll { let self_mut = self.get_mut(); @@ -73,4 +75,4 @@ impl Future for RobotsTxtResponse { }, } } -} \ No newline at end of file +} diff --git a/src/http/reqwest/sync_reqwest.rs b/src/http/reqwest/sync_reqwest.rs index 0365d66db..671cca410 100644 --- a/src/http/reqwest/sync_reqwest.rs +++ b/src/http/reqwest/sync_reqwest.rs @@ -1,23 +1,24 @@ use reqwest::blocking::{Client, Request}; -use reqwest::{Method, Error}; +use reqwest::Method; use reqwest::header::HeaderValue; use url::{Origin, Url}; use reqwest::header::USER_AGENT; use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT}; use crate::parser::{ParseResult, parse_fetched_robots_txt}; use crate::model::FetchedRobotsTxt; +use crate::model::{Error, ErrorKind}; impl RobotsTxtClient for Client { type Result = Result, Error>; fn fetch_robots_txt(&self, origin: Origin) -> Self::Result { let url = format!("{}/robots.txt", origin.unicode_serialization()); - let url = Url::parse(&url).expect("Unable to parse robots.txt url"); + let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?; let mut request = Request::new(Method::GET, url); let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT)); - let response = self.execute(request)?; + let response = self.execute(request).map_err(|err| Error {kind: ErrorKind::Http(err)})?; let status_code = response.status().as_u16(); - let text = response.text()?; + let text = response.text().map_err(|err| Error {kind: ErrorKind::Http(err)})?; let robots_txt = parse_fetched_robots_txt(origin, status_code, &text); return Ok(robots_txt); } -} \ No newline at end of file +} diff --git a/src/model.rs b/src/model.rs index 483385d4b..d56bd4a23 100644 --- a/src/model.rs +++ b/src/model.rs @@ -14,4 +14,6 @@ pub (crate) use self::fetched_robots_txt::FetchedRobotsTxtContainer; mod fetched_robots_txt; pub use self::robots_txt::RobotsTxt; mod path; -pub (crate) use self::path::Path; \ No newline at end of file +pub (crate) use self::path::Path; +mod errors; +pub use self::errors::{Error, ErrorKind}; diff --git a/src/model/errors.rs b/src/model/errors.rs new file mode 100644 index 000000000..dd631eb53 --- /dev/null +++ b/src/model/errors.rs @@ -0,0 +1,23 @@ +use std::fmt; + +#[derive(Debug)] +pub struct Error { + pub kind: ErrorKind, +} + +#[derive(Debug)] +pub enum ErrorKind { + Url(url::ParseError), + Http(reqwest::Error), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.kind { + ErrorKind::Url(ref err) => err.fmt(f), + ErrorKind::Http(ref err) => err.fmt(f), + } + } +} + +impl std::error::Error for Error {} diff --git a/tests/test_reqwest_async.rs b/tests/test_reqwest_async.rs index a5ecd0806..3701b2b52 100644 --- a/tests/test_reqwest_async.rs +++ b/tests/test_reqwest_async.rs @@ -3,14 +3,28 @@ use robotparser::service::RobotsTxtService; use reqwest::Client; use url::Url; use tokio::runtime::Runtime; +use url::{Host, Origin}; #[test] fn test_reqwest_async() { let mut runtime = Runtime::new().unwrap(); let client = Client::new(); let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap(); - let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin())); + let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()).unwrap()); let robots_txt = robots_txt_response.unwrap().get_result(); let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap(); assert!(robots_txt.can_fetch("*", &fetch_url)); -} \ No newline at end of file + let fetch_url = Url::parse("http://www.python.org/webstats/").unwrap(); + assert!(!robots_txt.can_fetch("*", &fetch_url)); +} + +#[test] +fn test_reqwest_blocking_panic_url() { + let client = Client::new(); + let host = Host::Domain("python.org::".into()); + let origin = Origin::Tuple("https".into(), host, 80); + match client.fetch_robots_txt(origin) { + Ok(_) => assert!(false), + Err(_) => assert!(true) + } +} diff --git a/tests/test_reqwest_blocking.rs b/tests/test_reqwest_blocking.rs index 1c38c4eb3..b82681127 100644 --- a/tests/test_reqwest_blocking.rs +++ b/tests/test_reqwest_blocking.rs @@ -2,6 +2,7 @@ use robotparser::http::RobotsTxtClient; use robotparser::service::RobotsTxtService; use reqwest::blocking::Client; use url::Url; +use url::{Host, Origin}; #[test] fn test_reqwest_blocking() { @@ -10,4 +11,17 @@ fn test_reqwest_blocking() { let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result(); let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap(); assert!(robots_txt.can_fetch("*", &fetch_url)); + let fetch_url = Url::parse("https://www.python.org/webstats/").unwrap(); + assert!(!robots_txt.can_fetch("*", &fetch_url)); +} + +#[test] +fn test_reqwest_blocking_panic_url() { + let client = Client::new(); + let host = Host::Domain("python.org::".into()); + let origin = Origin::Tuple("https".into(), host, 80); + match client.fetch_robots_txt(origin) { + Ok(_) => assert!(false), + Err(_) => assert!(true) + } }