Skip to content

Commit

Permalink
fix up
Browse files Browse the repository at this point in the history
  • Loading branch information
twiny committed Jul 11, 2022
1 parent 99bd177 commit 093db74
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 10 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## WBot - a web crawler
## WBot

A configurable, thread-safe web crawler, provides a minimal interface for crawling and downloading web pages.

Expand Down
10 changes: 6 additions & 4 deletions filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,22 @@ func newFilter(allowed, disallowed []string) *filter {

// Allow
func (f *filter) Allow(l *url.URL) bool {
raw := l.String()

if badExtensions.MatchString(l.Path) {
return false
}

// disallowed
for _, r := range f.disallowed {
if r.MatchString(l.String()) {
for _, d := range f.disallowed {
if d.MatchString(raw) {
return false
}
}

// allowed
for _, p := range f.allowed {
if !p.MatchString(l.String()) {
for _, a := range f.allowed {
if !a.MatchString(raw) {
return false
}
}
Expand Down
5 changes: 0 additions & 5 deletions wbot.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,6 @@ func (wb *WBot) Crawl(link string) error {
return err
}

// check filter
if !wb.filter.Allow(req.URL) {
return fmt.Errorf("not allowed")
}

// rate limit
wb.limit.take(req.URL)

Expand Down

0 comments on commit 093db74

Please sign in to comment.