Skip to content

Commit

Permalink
Add support for custom headers when checking the initial inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
mre committed Nov 7, 2024
1 parent a28c92b commit d4ede50
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 29 deletions.
3 changes: 3 additions & 0 deletions examples/collect_links/collect_links.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use http::HeaderMap;
use lychee_lib::{Collector, Input, InputSource, Result};
use reqwest::Url;
use std::path::PathBuf;
Expand All @@ -13,11 +14,13 @@ async fn main() -> Result<()> {
)),
file_type_hint: None,
excluded_paths: None,
headers: HeaderMap::new(),
},
Input {
source: InputSource::FsPath(PathBuf::from("fixtures/TEST.md")),
file_type_hint: None,
excluded_paths: None,
headers: HeaderMap::new(),
},
];

Expand Down
13 changes: 11 additions & 2 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::archive::Archive;
use crate::parse::parse_base;
use crate::parse::{parse_base, parse_headers};
use crate::verbosity::Verbosity;
use anyhow::{anyhow, Context, Error, Result};
use clap::builder::PossibleValuesParser;
Expand Down Expand Up @@ -195,9 +195,18 @@ impl LycheeOptions {
} else {
Some(self.config.exclude_path.clone())
};
let headers = parse_headers(&self.config.header)?;
self.raw_inputs
.iter()
.map(|s| Input::new(s, None, self.config.glob_ignore_case, excluded.clone()))
.map(|s| {
Input::new(
s,
None,
self.config.glob_ignore_case,
excluded.clone(),
headers.clone(),
)
})
.collect::<Result<_, _>>()
.context("Cannot parse inputs from arguments")
}
Expand Down
28 changes: 24 additions & 4 deletions lychee-lib/src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ impl Collector {
mod tests {
use std::{collections::HashSet, convert::TryFrom, fs::File, io::Write};

use http::StatusCode;
use http::{HeaderMap, StatusCode};
use reqwest::Url;

use super::*;
Expand Down Expand Up @@ -173,7 +173,13 @@ mod tests {
// Treat as plaintext file (no extension)
let file_path = temp_dir.path().join("README");
let _file = File::create(&file_path).unwrap();
let input = Input::new(&file_path.as_path().display().to_string(), None, true, None)?;
let input = Input::new(
&file_path.as_path().display().to_string(),
None,
true,
None,
HeaderMap::new(),
)?;
let contents: Vec<_> = input
.get_contents(true, true, true)
.collect::<Vec<_>>()
Expand All @@ -186,7 +192,7 @@ mod tests {

#[tokio::test]
async fn test_url_without_extension_is_html() -> Result<()> {
let input = Input::new("https://example.com/", None, true, None)?;
let input = Input::new("https://example.com/", None, true, None, HeaderMap::new())?;
let contents: Vec<_> = input
.get_contents(true, true, true)
.collect::<Vec<_>>()
Expand Down Expand Up @@ -221,6 +227,7 @@ mod tests {
source: InputSource::String(TEST_STRING.to_owned()),
file_type_hint: None,
excluded_paths: None,
headers: HeaderMap::new(),
},
Input {
source: InputSource::RemoteUrl(Box::new(
Expand All @@ -230,11 +237,13 @@ mod tests {
)),
file_type_hint: None,
excluded_paths: None,
headers: HeaderMap::new(),
},
Input {
source: InputSource::FsPath(file_path),
file_type_hint: None,
excluded_paths: None,
headers: HeaderMap::new(),
},
Input {
source: InputSource::FsGlob {
Expand All @@ -243,6 +252,7 @@ mod tests {
},
file_type_hint: None,
excluded_paths: None,
headers: HeaderMap::new(),
},
];

Expand All @@ -267,7 +277,8 @@ mod tests {
let input = Input {
source: InputSource::String("This is [a test](https://endler.dev). This is a relative link test [Relative Link Test](relative_link)".to_string()),
file_type_hint: Some(FileType::Markdown),
excluded_paths: None,
excluded_paths: None,
headers: HeaderMap::new(),
};
let links = collect(vec![input], Some(base)).await;

Expand All @@ -294,6 +305,7 @@ mod tests {
),
file_type_hint: Some(FileType::Html),
excluded_paths: None,
headers: HeaderMap::new(),
};
let links = collect(vec![input], Some(base)).await;

Expand Down Expand Up @@ -323,6 +335,7 @@ mod tests {
),
file_type_hint: Some(FileType::Html),
excluded_paths: None,
headers: HeaderMap::new(),
};
let links = collect(vec![input], Some(base)).await;

Expand All @@ -349,6 +362,7 @@ mod tests {
),
file_type_hint: Some(FileType::Markdown),
excluded_paths: None,
headers: HeaderMap::new(),
};

let links = collect(vec![input], Some(base)).await;
Expand All @@ -372,6 +386,7 @@ mod tests {
source: InputSource::String(input),
file_type_hint: Some(FileType::Html),
excluded_paths: None,
headers: HeaderMap::new(),
};
let links = collect(vec![input], Some(base)).await;

Expand Down Expand Up @@ -404,6 +419,7 @@ mod tests {
source: InputSource::RemoteUrl(Box::new(server_uri.clone())),
file_type_hint: None,
excluded_paths: None,
headers: HeaderMap::new(),
};

let links = collect(vec![input], None).await;
Expand All @@ -424,6 +440,7 @@ mod tests {
),
file_type_hint: None,
excluded_paths: None,
headers: HeaderMap::new(),
};
let links = collect(vec![input], None).await;

Expand Down Expand Up @@ -454,6 +471,7 @@ mod tests {
)),
file_type_hint: Some(FileType::Html),
excluded_paths: None,
headers: HeaderMap::new(),
},
Input {
source: InputSource::RemoteUrl(Box::new(
Expand All @@ -465,6 +483,7 @@ mod tests {
)),
file_type_hint: Some(FileType::Html),
excluded_paths: None,
headers: HeaderMap::new(),
},
];

Expand Down Expand Up @@ -500,6 +519,7 @@ mod tests {
),
file_type_hint: Some(FileType::Html),
excluded_paths: None,
headers: HeaderMap::new(),
};

let links = collect(vec![input], Some(base)).await;
Expand Down
59 changes: 36 additions & 23 deletions lychee-lib/src/types/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::{utils, ErrorKind, Result};
use async_stream::try_stream;
use futures::stream::Stream;
use glob::glob_with;
use http::HeaderMap;
use ignore::WalkBuilder;
use reqwest::Url;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -105,14 +106,16 @@ impl Display for InputSource {
}

/// Lychee Input with optional file hint for parsing
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Input {
/// Origin of input
pub source: InputSource,
/// Hint to indicate which extractor to use
pub file_type_hint: Option<FileType>,
/// Excluded paths that will be skipped when reading content
pub excluded_paths: Option<Vec<PathBuf>>,
/// Custom headers to be used when fetching remote URLs
pub headers: reqwest::header::HeaderMap,
}

impl Input {
Expand All @@ -129,6 +132,7 @@ impl Input {
file_type_hint: Option<FileType>,
glob_ignore_case: bool,
excluded_paths: Option<Vec<PathBuf>>,
headers: reqwest::header::HeaderMap,
) -> Result<Self> {
let source = if value == STDIN {
InputSource::Stdin
Expand Down Expand Up @@ -194,9 +198,20 @@ impl Input {
source,
file_type_hint,
excluded_paths,
headers,
})
}

/// Convenience constructor with sane defaults
///
/// # Errors
///
/// Returns an error if the input does not exist (i.e. invalid path)
/// and the input cannot be parsed as a URL.
pub fn from_value(value: &str) -> Result<Self> {
Self::new(value, None, false, None, HeaderMap::new())
}

/// Retrieve the contents from the input
///
/// # Errors
Expand Down Expand Up @@ -424,6 +439,8 @@ fn is_excluded_path(excluded_paths: &[PathBuf], path: &PathBuf) -> bool {

#[cfg(test)]
mod tests {
use http::HeaderMap;

use super::*;

#[test]
Expand All @@ -434,14 +451,15 @@ mod tests {
assert!(path.exists());
assert!(path.is_relative());

let input = Input::new(test_file, None, false, None);
let input = Input::new(test_file, None, false, None, HeaderMap::new());
assert!(input.is_ok());
assert!(matches!(
input,
Ok(Input {
source: InputSource::FsPath(PathBuf { .. }),
file_type_hint: None,
excluded_paths: None
excluded_paths: None,
headers: _,
})
));
}
Expand All @@ -454,7 +472,7 @@ mod tests {
assert!(!path.exists());
assert!(path.is_relative());

let input = Input::new(test_file, None, false, None);
let input = Input::from_value(test_file);
assert!(input.is_err());
assert!(matches!(input, Err(ErrorKind::InvalidFile(PathBuf { .. }))));
}
Expand Down Expand Up @@ -497,7 +515,7 @@ mod tests {

#[test]
fn test_url_without_scheme() {
let input = Input::new("example.com", None, false, None);
let input = Input::from_value("example.com");
assert_eq!(
input.unwrap().source.to_string(),
String::from("http://example.com/")
Expand All @@ -508,7 +526,7 @@ mod tests {
#[cfg(windows)]
#[test]
fn test_windows_style_filepath_not_existing() {
let input = Input::new("C:\\example\\project\\here", None, false, None);
let input = Input::from_value("C:\\example\\project\\here");
assert!(input.is_err());
let input = input.unwrap_err();

Expand All @@ -528,7 +546,7 @@ mod tests {
let dir = temp_dir();
let file = NamedTempFile::new_in(dir).unwrap();
let path = file.path();
let input = Input::new(path.to_str().unwrap(), None, false, None).unwrap();
let input = Input::from_value(path.to_str().unwrap()).unwrap();

match input.source {
InputSource::FsPath(_) => (),
Expand All @@ -540,33 +558,28 @@ mod tests {
fn test_url_scheme_check_succeeding() {
// Valid http and https URLs
assert!(matches!(
Input::new("http://example.com", None, false, None),
Input::from_value("http://example.com"),
Ok(Input {
source: InputSource::RemoteUrl(_),
..
})
));
assert!(matches!(
Input::new("https://example.com", None, false, None),
Input::from_value("https://example.com"),
Ok(Input {
source: InputSource::RemoteUrl(_),
..
})
));
assert!(matches!(
Input::new(
"http://subdomain.example.com/path?query=value",
None,
false,
None
),
Input::from_value("http://subdomain.example.com/path?query=value",),
Ok(Input {
source: InputSource::RemoteUrl(_),
..
})
));
assert!(matches!(
Input::new("https://example.com:8080", None, false, None),
Input::from_value("https://example.com:8080"),
Ok(Input {
source: InputSource::RemoteUrl(_),
..
Expand All @@ -578,19 +591,19 @@ mod tests {
fn test_url_scheme_check_failing() {
// Invalid schemes
assert!(matches!(
Input::new("ftp://example.com", None, false, None),
Input::from_value("ftp://example.com"),
Err(ErrorKind::InvalidFile(_))
));
assert!(matches!(
Input::new("httpx://example.com", None, false, None),
Input::from_value("httpx://example.com"),
Err(ErrorKind::InvalidFile(_))
));
assert!(matches!(
Input::new("file:///path/to/file", None, false, None),
Input::from_value("file:///path/to/file"),
Err(ErrorKind::InvalidFile(_))
));
assert!(matches!(
Input::new("mailto:user@example.com", None, false, None),
Input::from_value("mailto:user@example.com"),
Err(ErrorKind::InvalidFile(_))
));
}
Expand All @@ -599,19 +612,19 @@ mod tests {
fn test_non_url_inputs() {
// Non-URL inputs
assert!(matches!(
Input::new("./local/path", None, false, None),
Input::from_value("./local/path"),
Err(ErrorKind::InvalidFile(_))
));
assert!(matches!(
Input::new("*.md", None, false, None),
Input::from_value("*.md"),
Ok(Input {
source: InputSource::FsGlob { .. },
..
})
));
// Assuming the current directory exists
assert!(matches!(
Input::new(".", None, false, None),
Input::from_value("."),
Ok(Input {
source: InputSource::FsPath(_),
..
Expand Down

0 comments on commit d4ede50

Please sign in to comment.