Skip to content

Commit

Permalink
Merge pull request #307 from openzim/zimcheck_protocol_relative_exter…
Browse files Browse the repository at this point in the history
…nal_urls
  • Loading branch information
mgautierfr committed Jul 5, 2022
2 parents 92fc9c7 + 3c5c32e commit 89b0ae0
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 9 deletions.
8 changes: 6 additions & 2 deletions src/tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,12 @@ void asciitolower(std::string& s)
UriKind html_link::detectUriKind(const std::string& input_string)
{
const auto k = input_string.find_first_of(":/?#");
if ( k == std::string::npos || input_string[k] != ':' )
return UriKind::OTHER;
if ( k == std::string::npos || input_string[k] != ':' ) {
if ( k == 0 && input_string.substr(0, 2) == "//" )
return UriKind::PROTOCOL_RELATIVE;
else
return UriKind::OTHER;
}

if ( k + 2 < input_string.size()
&& input_string[k+1] == '/'
Expand Down
1 change: 1 addition & 0 deletions src/tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ enum class UriKind : int
URN, // urn:nbn:de:bsz:24-digibib-bsz3530416370

GENERIC_URI, // Generic URI with scheme and authority: <scheme>://.....
PROTOCOL_RELATIVE, // Protocol-relative URL: //<host>/<path>/<to>/<resource>

OTHER // not a valid URI (though it can be a valid relative
// or absolute URL)
Expand Down
Binary file modified test/data/zimfiles/bad_checksum.zim
Binary file not shown.
8 changes: 5 additions & 3 deletions test/data/zimfiles/create_test_zimfiles
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ make__good__zim()
--threads 1 \
--no-uuid \
-w main.html \
-f favicon.png \
-I favicon.png \
-l en \
-t "Test ZIM file" \
-d "N/A" \
Expand Down Expand Up @@ -68,7 +68,9 @@ make__poor__zim()
mv favicon.png image.png
sed -i -e 's!favicon.png!image.png!' main.html
sed -e 's!A/article1.html!!' main.html > empty_link.html
sed -e 's!I/image.png!http://a.io/pic.png!' main.html > external_link.html
sed -e 's!I/image\.png!http://a.io/pic.png!' main.html > external_image_http.html
sed -e 's!I/image\.png!https://a.io/pic.png!' main.html > external_image_https.html
sed -e 's!I/image\.png!//a.io/pic.png!' main.html > external_image_protocol_relative.html
sed -e 's/article1/non_existent/' main.html > dangling_link.html
sed -e 's!A/article1!../../oops!' main.html > outofbounds_link.html
command_set="sed -e '6 a <meta http-equiv=refresh content=\"0;URL=redirect_loop.html\">' main.html"
Expand All @@ -84,7 +86,7 @@ make__poor__zim()
--threads 1 \
--no-uuid \
-w "" \
-f "" \
-I "" \
-l en \
-t "" \
-d "" \
Expand Down
Binary file modified test/data/zimfiles/good.zim
Binary file not shown.
Binary file modified test/data/zimfiles/poor.zim
Binary file not shown.
17 changes: 17 additions & 0 deletions test/data/zimfiles/small_zimfile_data/main.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,27 @@
<h1>Main page</h1>

<img src="I/favicon.png">
<h2>Internal links</h2>
<ul>
<li>
<a href="A/article1.html">Article 1</a>
</li>
</ul>
<h2>External links</h2>
<ul>
<li>
<a href="http://openzim.org">HTTP link</a>
</li>
<li>
<a href="https://openzim.org">HTTPS link</a>
</li>
<li>
<a href="ftp://ftp.openzim.org">FTP link</a>
</li>
<li>
<a href="//openzim.org">Protocol relative link</a>
</li>
</ul>
<img src="">
</body>
</html>
3 changes: 3 additions & 0 deletions test/tools-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ TEST(tools, uriKind)
EXPECT_EQ(UriKind::GENERIC_URI, uriKind("file:///etc/passwd"));
EXPECT_EQ(UriKind::GENERIC_URI, uriKind("ftp://download.kiwix.org/zim/"));

EXPECT_EQ(UriKind::PROTOCOL_RELATIVE, uriKind("//example.com"));

EXPECT_EQ(UriKind::MAILTO, uriKind("mailto:someone@example.com"));
EXPECT_EQ(UriKind::MAILTO, uriKind("MAILTO:someone@example.com"));

Expand Down Expand Up @@ -212,6 +214,7 @@ TEST(tools, uriKind)
EXPECT_EQ(UriKind::OTHER, uriKind("showlocation.cgi?geo:12.34,56.78"));
EXPECT_EQ(UriKind::OTHER, uriKind("/xyz/javascript:console.log('hello, world!')"));

EXPECT_EQ(UriKind::OTHER, uriKind("/"));
EXPECT_EQ(UriKind::OTHER, uriKind("/api/data:text/plain;charset=UTF-8,qwerty"));
EXPECT_EQ(UriKind::OTHER, uriKind("../img/logo.png"));
EXPECT_EQ(UriKind::OTHER, uriKind("style.css"));
Expand Down
26 changes: 22 additions & 4 deletions test/zimcheck-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,9 @@ TEST(zimcheck, external_url_check_poorzimfile)
"[INFO] Zimcheck version is " VERSION "\n"
"[INFO] Verifying Articles' content..." "\n"
"[ERROR] Invalid external links found:" "\n"
" http://a.io/pic.png is an external dependence in article external_link.html" "\n"
" http://a.io/pic.png is an external dependence in article external_image_http.html" "\n"
" https://a.io/pic.png is an external dependence in article external_image_https.html" "\n"
" //a.io/pic.png is an external dependence in article external_image_protocol_relative.html" "\n"
"[INFO] Overall Test Status: Fail" "\n"
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
);
Expand Down Expand Up @@ -732,7 +734,9 @@ const std::string ALL_CHECKS_OUTPUT_ON_POORZIMFILE(
" Found 1 empty links in article: empty_link.html" "\n"
" ../../oops.html is out of bounds. Article: outofbounds_link.html" "\n"
"[ERROR] Invalid external links found:" "\n"
" http://a.io/pic.png is an external dependence in article external_link.html" "\n"
" http://a.io/pic.png is an external dependence in article external_image_http.html" "\n"
" https://a.io/pic.png is an external dependence in article external_image_https.html" "\n"
" //a.io/pic.png is an external dependence in article external_image_protocol_relative.html" "\n"
"[ERROR] Redirect loop(s) exist:" "\n"
" Redirect loop exists from entry redirect_loop.html" "\n"
"" "\n"
Expand Down Expand Up @@ -882,9 +886,23 @@ TEST(zimcheck, json_poorzimfile)
" {" "\n"
" \"check\" : \"url_external\"," "\n"
" \"level\" : \"ERROR\"," "\n"
" \"message\" : \"http://a.io/pic.png is an external dependence in article external_link.html\"," "\n"
" \"message\" : \"http://a.io/pic.png is an external dependence in article external_image_http.html\"," "\n"
" \"link\" : \"http://a.io/pic.png\"," "\n"
" \"path\" : \"external_link.html\"" "\n"
" \"path\" : \"external_image_http.html\"" "\n"
" }," "\n"
" {" "\n"
" \"check\" : \"url_external\"," "\n"
" \"level\" : \"ERROR\"," "\n"
" \"message\" : \"https://a.io/pic.png is an external dependence in article external_image_https.html\"," "\n"
" \"link\" : \"https://a.io/pic.png\"," "\n"
" \"path\" : \"external_image_https.html\"" "\n"
" }," "\n"
" {" "\n"
" \"check\" : \"url_external\"," "\n"
" \"level\" : \"ERROR\"," "\n"
" \"message\" : \"//a.io/pic.png is an external dependence in article external_image_protocol_relative.html\"," "\n"
" \"link\" : \"//a.io/pic.png\"," "\n"
" \"path\" : \"external_image_protocol_relative.html\"" "\n"
" }," "\n"
" {" "\n"
" \"check\" : \"redirect\"," "\n"
Expand Down

0 comments on commit 89b0ae0

Please sign in to comment.