Skip to content

Commit d38fcd8

Browse files
authored
Add basic netlify redirects support (#187)
* Add basic netlify redirects support * ignore nonroot redirects * fix tests on windows * fixup tests * restructure docs * fix install-tester workflow * upgrade dist
1 parent a57b956 commit d38fcd8

File tree

4 files changed

+219
-19
lines changed

4 files changed

+219
-19
lines changed

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,41 @@ and `--github-actions` feature.
184184
fairly feature-rich, but was a non-starter due to performance. This applies
185185
to other countless link checkers we tried that are not mentioned here.
186186

187+
## Redirects
188+
189+
Since 0.1.45 `hyperlink` supports reading configured redirects from a file.
190+
191+
At the root of your site, make a file `_redirects`:
192+
193+
```
194+
# lines starting with # are ignored
195+
/old-url.html /new-url.html
196+
197+
# on the next line, trailing data like the 301 status code is ignored
198+
/old-url2.html /new-url2.html 301
199+
200+
# /old-url.html will become a valid link target
201+
# hyperlink will validate that /new-url.html exists.
202+
```
203+
204+
This format is supported by at least Netlify, [Codeberg
205+
pages](https://codeberg.page) and [Grebedoc](https://grebedoc.dev)
206+
207+
References for this format can be found at
208+
[Codeberg](https://docs.codeberg.org/codeberg-pages/redirects/) and
209+
[Netlify](https://docs.netlify.com/manage/routing/redirects/overview/).
210+
211+
The major things missing from the implementation are:
212+
213+
* `hyperlink` completely ignores any status codes or country code conditions.
214+
The only thing it parses are `from to`, and the rest is ignored.
215+
216+
* "Splat sources" (`/articles/*`) and "splat targets" (`/posts/:splat`) are
217+
not supported.
218+
219+
* Generally speaking, `hyperlink` does not support "pretty URLs", i.e. one
220+
cannot request `/mypage` and expect `mypage.html` to be loaded.
221+
187222
## Testimonials
188223

189224
> We use Hyperlink to check for dead links on

src/html/mod.rs

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ mod parser;
33
use std::borrow::Cow;
44
use std::fmt;
55
use std::fs;
6-
use std::io::Read;
6+
use std::io::{BufRead, BufReader, Read};
77
use std::path::{Path, PathBuf};
88
use std::str;
99
use std::sync::Arc;
@@ -308,6 +308,41 @@ impl Document {
308308
Href(href.into_bump_str())
309309
}
310310

311+
pub fn extract_links<'b, 'l, P: ParagraphWalker, F>(
312+
&self,
313+
doc_buf: &'b mut DocumentBuffers,
314+
check_anchors: bool,
315+
mut callback: F,
316+
) -> Result<bool, Error>
317+
where
318+
'b: 'l,
319+
F: FnMut(Link<'l, P::Paragraph>),
320+
{
321+
if self.href == "_redirects" {
322+
for link in self.parse_redirects::<P>(doc_buf, check_anchors)? {
323+
callback(link);
324+
}
325+
return Ok(true);
326+
}
327+
328+
if self
329+
.path
330+
.extension()
331+
.and_then(|extension| {
332+
let ext = extension.to_str()?;
333+
Some(ext == "html" || ext == "htm")
334+
})
335+
.unwrap_or(false)
336+
{
337+
for link in self.links_from_html::<P>(doc_buf, check_anchors)? {
338+
callback(link);
339+
}
340+
return Ok(true);
341+
}
342+
343+
Ok(false)
344+
}
345+
311346
pub fn links<'b, 'l, P: ParagraphWalker>(
312347
&self,
313348
doc_buf: &'b mut DocumentBuffers,
@@ -319,6 +354,62 @@ impl Document {
319354
self.links_from_read::<_, P>(doc_buf, fs::File::open(&*self.path)?, check_anchors)
320355
}
321356

357+
fn links_from_html<'b, 'l, P: ParagraphWalker>(
358+
&self,
359+
doc_buf: &'b mut DocumentBuffers,
360+
check_anchors: bool,
361+
) -> Result<impl Iterator<Item = Link<'l, P::Paragraph>>, Error>
362+
where
363+
'b: 'l,
364+
{
365+
self.links_from_read::<_, P>(doc_buf, fs::File::open(&*self.path)?, check_anchors)
366+
}
367+
368+
fn parse_redirects<'b, 'l, P: ParagraphWalker>(
369+
&self,
370+
doc_buf: &'b mut DocumentBuffers,
371+
check_anchors: bool,
372+
) -> Result<impl Iterator<Item = Link<'l, P::Paragraph>>, Error>
373+
where
374+
'b: 'l,
375+
{
376+
let mut link_buf = BumpVec::new_in(&doc_buf.arena);
377+
let file = fs::File::open(&*self.path)?;
378+
let reader = BufReader::new(file);
379+
380+
for line in reader.lines() {
381+
let line = line?;
382+
383+
let trimmed = line.trim();
384+
if trimmed.is_empty() || trimmed.starts_with('#') {
385+
continue;
386+
}
387+
388+
let parts: Vec<&str> = trimmed.split_whitespace().collect();
389+
if parts.len() >= 2 {
390+
let source = parts[0];
391+
let target = parts[1];
392+
393+
let source_str = doc_buf.arena.alloc_str(source);
394+
let target_str = doc_buf.arena.alloc_str(target);
395+
396+
link_buf.push(Link::Defines(DefinedLink {
397+
href: self.join(&doc_buf.arena, check_anchors, source_str),
398+
}));
399+
400+
if !is_external_link(target.as_bytes()) {
401+
link_buf.push(Link::Uses(UsedLink {
402+
href: self.join(&doc_buf.arena, check_anchors, target_str),
403+
path: self.path.clone(),
404+
paragraph: None,
405+
}));
406+
}
407+
}
408+
}
409+
410+
Ok(link_buf.into_iter())
411+
}
412+
322413
fn links_from_read<'b, 'l, R: Read, P: ParagraphWalker>(
323414
&self,
324415
doc_buf: &'b mut DocumentBuffers,

src/main.rs

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -468,26 +468,17 @@ fn extract_html_links<C: LinkCollector<P::Paragraph>, P: ParagraphWalker>(
468468
}));
469469
file_count += 1;
470470

471-
if !document
472-
.path
473-
.extension()
474-
.and_then(|extension| Some(HTML_FILES.contains(&extension.to_str()?)))
475-
.unwrap_or(false)
476-
{
477-
return Ok((doc_buf, collector, documents_count, file_count));
471+
let was_parsed = document
472+
.extract_links::<P, _>(&mut doc_buf, check_anchors, |link| {
473+
collector.ingest(link);
474+
})
475+
.with_context(|| format!("Failed to read file {}", document.path.display()))?;
476+
477+
if was_parsed {
478+
doc_buf.reset();
479+
documents_count += 1;
478480
}
479481

480-
for link in document
481-
.links::<P>(&mut doc_buf, check_anchors)
482-
.with_context(|| format!("Failed to read file {}", document.path.display()))?
483-
{
484-
collector.ingest(link);
485-
}
486-
487-
doc_buf.reset();
488-
489-
documents_count += 1;
490-
491482
Ok((doc_buf, collector, documents_count, file_count))
492483
},
493484
)

tests/cli_snapshots.rs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use assert_fs::prelude::*;
12
use insta_cmd::{assert_cmd_snapshot, get_cargo_bin};
23
use std::process::Command;
34

@@ -98,3 +99,85 @@ fn test_version() {
9899
----- stderr -----
99100
"###);
100101
}
102+
103+
#[test]
104+
fn test_redirects() {
105+
let site = assert_fs::TempDir::new().unwrap();
106+
107+
site.child("_redirects")
108+
.write_str(
109+
"# This is a comment\n\
110+
\n\
111+
/old-page /new-page.html 301\n\
112+
/external https://example.com/page\n\
113+
/broken /missing-page.html\n\
114+
/another /target.html",
115+
)
116+
.unwrap();
117+
118+
site.child("new-page.html").touch().unwrap();
119+
site.child("target.html").touch().unwrap();
120+
121+
site.child("index.html")
122+
.write_str("<a href='/old-page'>link</a>")
123+
.unwrap();
124+
125+
let mut settings = insta::Settings::clone_current();
126+
settings.add_filter(r"[/\\]", "/");
127+
let _guard = settings.bind_to_scope();
128+
129+
assert_cmd_snapshot!(cli().arg(".").current_dir(site.path()), @r###"
130+
success: false
131+
exit_code: 1
132+
----- stdout -----
133+
Reading files
134+
Checking 4 links from 4 files (4 documents)
135+
./_redirects
136+
error: bad link /missing-page.html
137+
138+
Found 1 bad links
139+
140+
----- stderr -----
141+
"###);
142+
143+
site.close().unwrap();
144+
}
145+
146+
#[test]
147+
fn test_redirects_only_at_root() {
148+
let site = assert_fs::TempDir::new().unwrap();
149+
150+
site.child("_redirects")
151+
.write_str("/old-page /new-page.html")
152+
.unwrap();
153+
154+
site.child("subdir/_redirects")
155+
.write_str("/sub-old /sub-new.html")
156+
.unwrap();
157+
158+
site.child("new-page.html").touch().unwrap();
159+
160+
site.child("index.html")
161+
.write_str("<a href='/old-page'>link to old</a><a href='/sub-old'>link to sub</a>")
162+
.unwrap();
163+
164+
let mut settings = insta::Settings::clone_current();
165+
settings.add_filter(r"[/\\]", "/");
166+
let _guard = settings.bind_to_scope();
167+
168+
assert_cmd_snapshot!(cli().arg(".").current_dir(site.path()), @r###"
169+
success: false
170+
exit_code: 1
171+
----- stdout -----
172+
Reading files
173+
Checking 3 links from 4 files (3 documents)
174+
./index.html
175+
error: bad link /sub-old
176+
177+
Found 1 bad links
178+
179+
----- stderr -----
180+
"###);
181+
182+
site.close().unwrap();
183+
}

0 commit comments

Comments
 (0)