@@ -3,7 +3,7 @@ mod parser;
33use std:: borrow:: Cow ;
44use std:: fmt;
55use std:: fs;
6- use std:: io:: Read ;
6+ use std:: io:: { BufRead , BufReader , Read } ;
77use std:: path:: { Path , PathBuf } ;
88use std:: str;
99use std:: sync:: Arc ;
@@ -308,6 +308,41 @@ impl Document {
308308 Href ( href. into_bump_str ( ) )
309309 }
310310
311+ pub fn extract_links < ' b , ' l , P : ParagraphWalker , F > (
312+ & self ,
313+ doc_buf : & ' b mut DocumentBuffers ,
314+ check_anchors : bool ,
315+ mut callback : F ,
316+ ) -> Result < bool , Error >
317+ where
318+ ' b : ' l ,
319+ F : FnMut ( Link < ' l , P :: Paragraph > ) ,
320+ {
321+ if self . href == "_redirects" {
322+ for link in self . parse_redirects :: < P > ( doc_buf, check_anchors) ? {
323+ callback ( link) ;
324+ }
325+ return Ok ( true ) ;
326+ }
327+
328+ if self
329+ . path
330+ . extension ( )
331+ . and_then ( |extension| {
332+ let ext = extension. to_str ( ) ?;
333+ Some ( ext == "html" || ext == "htm" )
334+ } )
335+ . unwrap_or ( false )
336+ {
337+ for link in self . links_from_html :: < P > ( doc_buf, check_anchors) ? {
338+ callback ( link) ;
339+ }
340+ return Ok ( true ) ;
341+ }
342+
343+ Ok ( false )
344+ }
345+
311346 pub fn links < ' b , ' l , P : ParagraphWalker > (
312347 & self ,
313348 doc_buf : & ' b mut DocumentBuffers ,
@@ -319,6 +354,62 @@ impl Document {
319354 self . links_from_read :: < _ , P > ( doc_buf, fs:: File :: open ( & * self . path ) ?, check_anchors)
320355 }
321356
357+ fn links_from_html < ' b , ' l , P : ParagraphWalker > (
358+ & self ,
359+ doc_buf : & ' b mut DocumentBuffers ,
360+ check_anchors : bool ,
361+ ) -> Result < impl Iterator < Item = Link < ' l , P :: Paragraph > > , Error >
362+ where
363+ ' b : ' l ,
364+ {
365+ self . links_from_read :: < _ , P > ( doc_buf, fs:: File :: open ( & * self . path ) ?, check_anchors)
366+ }
367+
368+ fn parse_redirects < ' b , ' l , P : ParagraphWalker > (
369+ & self ,
370+ doc_buf : & ' b mut DocumentBuffers ,
371+ check_anchors : bool ,
372+ ) -> Result < impl Iterator < Item = Link < ' l , P :: Paragraph > > , Error >
373+ where
374+ ' b : ' l ,
375+ {
376+ let mut link_buf = BumpVec :: new_in ( & doc_buf. arena ) ;
377+ let file = fs:: File :: open ( & * self . path ) ?;
378+ let reader = BufReader :: new ( file) ;
379+
380+ for line in reader. lines ( ) {
381+ let line = line?;
382+
383+ let trimmed = line. trim ( ) ;
384+ if trimmed. is_empty ( ) || trimmed. starts_with ( '#' ) {
385+ continue ;
386+ }
387+
388+ let parts: Vec < & str > = trimmed. split_whitespace ( ) . collect ( ) ;
389+ if parts. len ( ) >= 2 {
390+ let source = parts[ 0 ] ;
391+ let target = parts[ 1 ] ;
392+
393+ let source_str = doc_buf. arena . alloc_str ( source) ;
394+ let target_str = doc_buf. arena . alloc_str ( target) ;
395+
396+ link_buf. push ( Link :: Defines ( DefinedLink {
397+ href : self . join ( & doc_buf. arena , check_anchors, source_str) ,
398+ } ) ) ;
399+
400+ if !is_external_link ( target. as_bytes ( ) ) {
401+ link_buf. push ( Link :: Uses ( UsedLink {
402+ href : self . join ( & doc_buf. arena , check_anchors, target_str) ,
403+ path : self . path . clone ( ) ,
404+ paragraph : None ,
405+ } ) ) ;
406+ }
407+ }
408+ }
409+
410+ Ok ( link_buf. into_iter ( ) )
411+ }
412+
322413 fn links_from_read < ' b , ' l , R : Read , P : ParagraphWalker > (
323414 & self ,
324415 doc_buf : & ' b mut DocumentBuffers ,
0 commit comments