Skip to content

Commit

Permalink
Merge branch 'master' into chalin-ignoreurl-regex-for-internal-too-20…
Browse files Browse the repository at this point in the history
…21-09-17
  • Loading branch information
wjdp committed Mar 28, 2022
2 parents 03c4f89 + 54089cb commit 095de9a
Show file tree
Hide file tree
Showing 10 changed files with 45 additions and 23 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ dist/

# JetBrains config
.idea/
*.iml
1 change: 0 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 h1:bWDMxwH3px2JBh
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
github.com/fatih/color v1.10.0 h1:s36xzo75JdqLaaWoiEHk767eHiwo0598uUxyfiPkDsg=
github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM=
github.com/golangplus/bytes v0.0.0-20160111154220-45c989fe5450 h1:7xqw01UYS+KCI25bMrPxwNYkSns2Db1ziQPpVq99FpE=
github.com/golangplus/bytes v0.0.0-20160111154220-45c989fe5450/go.mod h1:Bk6SMAONeMXrxql8uvOKuAZSu8aM5RUGv+1C6IJaEho=
github.com/golangplus/bytes v1.0.0 h1:YQKBijBVMsBxIiXT4IEhlKR2zHohjEqPole4umyDX+c=
github.com/golangplus/bytes v1.0.0/go.mod h1:AdRaCFwmc/00ZzELMWb01soso6W1R/++O1XL80yAn+A=
Expand Down
7 changes: 5 additions & 2 deletions htmldoc/document.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@ package htmldoc

import (
"fmt"
"github.com/wjdp/htmltest/output"
"golang.org/x/net/html"
"os"
"path"
"sync"

"github.com/wjdp/htmltest/output"
"golang.org/x/net/html"
)

// Document struct, representation of a document within the tested site
type Document struct {
FilePath string // Relative to the shell session
SitePath string // Relative to the site root
BasePath string // Base for relative links
IgnoreTest bool // Ignore this Document for testing.
htmlMutex *sync.Mutex // Controls access to htmlNode
htmlNode *html.Node // Parsed output
hashMap map[string]*html.Node // Map of valid id/names of nodes
Expand Down Expand Up @@ -43,6 +45,7 @@ func (doc *Document) Init() {
// already been done. Thread safe. Either called when the document is tested
// or when another document needs data from this one.
func (doc *Document) Parse() {

// Only one routine may parse the doc
doc.htmlMutex.Lock()
defer doc.htmlMutex.Unlock()
Expand Down
15 changes: 6 additions & 9 deletions htmldoc/document_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
package htmldoc

import (
"github.com/wjdp/htmltest/output"
"os"
"path"
"regexp"

"github.com/wjdp/htmltest/output"
)

// DocumentStore struct, store of Documents including Document discovery
Expand Down Expand Up @@ -55,11 +56,6 @@ func (dS *DocumentStore) isDirIgnored(dir string) bool {

// Recursive function to discover documents by walking the file tree
func (dS *DocumentStore) discoverRecurse(dPath string) {
// Recurse over relative path dPath, saves found documents to dS
if dS.isDirIgnored(dPath) {
return
}

// Open directory to scan
f, err := os.Open(path.Join(dS.BasePath, dPath))
output.CheckErrorPanic(err)
Expand All @@ -83,9 +79,10 @@ func (dS *DocumentStore) discoverRecurse(dPath string) {
} else if path.Ext(fileinfo.Name()) == dS.DocumentExtension {
// If a file, create and save document
newDoc := &Document{
FilePath: path.Join(dS.BasePath, fPath),
SitePath: fPath,
BasePath: dPath,
FilePath: path.Join(dS.BasePath, fPath),
SitePath: fPath,
BasePath: dPath,
IgnoreTest: dS.isDirIgnored(dPath),
}
newDoc.Init()
dS.AddDocument(newDoc)
Expand Down
5 changes: 3 additions & 2 deletions htmldoc/document_store_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package htmldoc

import (
"github.com/daviddengcn/go-assert"
"testing"

"github.com/daviddengcn/go-assert"
)

func TestDocumentStoreDiscover(t *testing.T) {
Expand All @@ -25,7 +26,7 @@ func TestDocumentStoreIgnorePatterns(t *testing.T) {
dS.IgnorePatterns = []interface{}{"^lib/"}
dS.Discover()
// Fixtures dir has seven documents in various folders, (one ignored in lib)
assert.Equals(t, "document count", len(dS.Documents), 5)
assert.Equals(t, "document count", len(dS.Documents), 6)
}

func TestDocumentStoreDocumentExists(t *testing.T) {
Expand Down
4 changes: 3 additions & 1 deletion htmldoc/reference.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package htmldoc

import (
"golang.org/x/net/html"
"net/url"
"path"
"strings"

"golang.org/x/net/html"
)

// Reference struct, representation of the link between a document and a
Expand All @@ -19,6 +20,7 @@ type Reference struct {
// NewReference : Create a new reference given a document, node and path.
// Generates the URL object.
func NewReference(document *Document, node *html.Node, path string) (*Reference, error) {

// Clean path
path = strings.TrimLeftFunc(path, invalidPrePostRune)
path = strings.TrimRightFunc(path, invalidPrePostRune)
Expand Down
8 changes: 5 additions & 3 deletions htmltest/check-link.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ func (hT *HTMLTest) checkInternal(ref *htmldoc.Reference) {

urlStr := ref.URLString()

// Does this internal url match either a standard URL ignore rule or internal
// url ignore rule?
if hT.opts.isInternalURLIgnored(urlStr) || hT.opts.isURLIgnored(urlStr) {
return
}
Expand Down Expand Up @@ -320,7 +322,6 @@ func (hT *HTMLTest) checkInternalHash(ref *htmldoc.Reference) {
return
}

// var refDoc *htmldoc.Document
if len(ref.URL.Fragment) == 0 {
hT.issueStore.AddIssue(issues.Issue{
Level: issues.LevelError,
Expand All @@ -331,8 +332,9 @@ func (hT *HTMLTest) checkInternalHash(ref *htmldoc.Reference) {

if len(ref.URL.Path) > 0 {
// internal
refDoc, _ := hT.documentStore.ResolveRef(ref)
if !refDoc.IsHashValid(ref.URL.Fragment) {
refDoc, ok := hT.documentStore.ResolveRef(ref)

if !ok || !refDoc.IsHashValid(ref.URL.Fragment) {
hT.issueStore.AddIssue(issues.Issue{
Level: issues.LevelError,
Message: "hash does not exist",
Expand Down
7 changes: 7 additions & 0 deletions htmltest/check-link_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ func TestAnchorExternalBrokenOptionHrefIP(t *testing.T) {

func TestAnchorExternalHrefIPTimeout(t *testing.T) {
// fails for broken IP address links
tSkipShortExternal(t)
hT := tTestFileOpts("fixtures/links/ip_timeout.html",
map[string]interface{}{"ExternalTimeout": 1})
tExpectIssueCount(t, hT, 1)
Expand Down Expand Up @@ -205,6 +206,7 @@ func TestAnchorExternalBrokenOptionHTTPSInvalid(t *testing.T) {
func TestAnchorExternalHTTPSMissingChain(t *testing.T) {
// should support https aia
// see issue #130
tSkipShortExternal(t)
hT := tTestFileOpts("fixtures/links/https-incomplete-chain.html",
map[string]interface{}{"VCREnable": false})
tExpectIssue(t, hT, "incomplete certificate chain", 1)
Expand Down Expand Up @@ -285,12 +287,14 @@ func TestAnchorExternalInvalidBrackets(t *testing.T) {

func TestAnchorExternalQueryStringDefault(t *testing.T) {
// passes when ignoring from default list of query string exempt URLs
tSkipShortExternal(t)
hT := tTestFile("fixtures/links/query_strings.html")
tExpectIssueCount(t, hT, 0)
}

func TestAnchorExternalQueryStripQueryExcludesEmpty(t *testing.T) {
// fails when StripQueryExcludes blank and URL doesn't like query string hits
tSkipShortExternal(t)
hT := tTestFileOpts("fixtures/links/query_strings.html",
map[string]interface{}{"StripQueryExcludes": []interface{}{}})
tExpectIssueCount(t, hT, 1)
Expand All @@ -299,6 +303,7 @@ func TestAnchorExternalQueryStripQueryExcludesEmpty(t *testing.T) {

func TestAnchorExternalQueryStringStripQueryExcludesDiffers(t *testing.T) {
// fails when StripQueryExcludes does not include URL and URL doesn't like query string hits
tSkipShortExternal(t)
hT := tTestFileOpts("fixtures/links/query_strings.html",
map[string]interface{}{"StripQueryExcludes": []interface{}{"example.com", "test.invalid"}})
tExpectIssueCount(t, hT, 1)
Expand Down Expand Up @@ -719,12 +724,14 @@ func TestAnchorBlankHTML4(t *testing.T) {
}

func TestSelfSignedLink(t *testing.T) {
tSkipShortExternal(t)
hT := tTestFileOpts("fixtures/links/selfSignedLink.html",
map[string]interface{}{"IgnoreSSLVerify": false})
tExpectIssueCount(t, hT, 1)
}

func TestSelfSignedLinkIgnoreSSLVerify(t *testing.T) {
tSkipShortExternal(t)
hT := tTestFileOpts("fixtures/links/selfSignedLink.html",
map[string]interface{}{"IgnoreSSLVerify": true})
tExpectIssueCount(t, hT, 0)
Expand Down
19 changes: 14 additions & 5 deletions htmltest/htmltest.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@ import (
"crypto/tls"
"errors"
"fmt"
"github.com/wjdp/htmltest/htmldoc"
"github.com/wjdp/htmltest/issues"
"github.com/wjdp/htmltest/output"
"github.com/wjdp/htmltest/refcache"
"gopkg.in/seborama/govcr.v2"
"net/http"
"os"
"path"
"strings"
"sync"
"time"

"github.com/wjdp/htmltest/htmldoc"
"github.com/wjdp/htmltest/issues"
"github.com/wjdp/htmltest/output"
"github.com/wjdp/htmltest/refcache"
"gopkg.in/seborama/govcr.v2"
)

// Base path for VCR cassettes, relative to this package
Expand Down Expand Up @@ -186,6 +187,14 @@ func (hT *HTMLTest) testDocuments() {
}

func (hT *HTMLTest) testDocument(document *htmldoc.Document) {
if document.IgnoreTest {
hT.issueStore.AddIssue(issues.Issue{
Level: issues.LevelDebug,
Message: "ignored " + document.SitePath,
})
return
}

hT.issueStore.AddIssue(issues.Issue{
Level: issues.LevelDebug,
Message: "testDocument on " + document.SitePath,
Expand Down
1 change: 1 addition & 0 deletions htmltest/htmltest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ func TestFileExtensionOption(t *testing.T) {
}

func TestCacheIntegration(t *testing.T) {
tSkipShortExternal(t)
tTestFileOpts("fixtures/links/https-valid.html",
map[string]interface{}{"EnableCache": true})
hT2 := tTestFileOpts("fixtures/links/https-valid.html",
Expand Down

0 comments on commit 095de9a

Please sign in to comment.