Skip to content

Commit

Permalink
Get latest page always from wayback machine
Browse files Browse the repository at this point in the history
By deleting timestamp value from wayback query, we can get latest saved page.
fix brave/brave-browser#14843
  • Loading branch information
simonhong committed Mar 22, 2021
1 parent 10bdab4 commit 31413cd
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 6 deletions.
12 changes: 12 additions & 0 deletions components/brave_wayback_machine/brave_wayback_machine_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,15 @@ bool IsWaybackMachineDisabledFor(const GURL& url) {

return false;
}

GURL FixupWaybackQueryURL(const GURL& url) {
// Get latest page always from wayback machine by invalidating timestamp
// value in query string.
GURL fixed_url = url;
std::string unused;
if (net::GetValueForKeyInQuery(fixed_url, "timestamp", &unused)) {
fixed_url = net::AppendOrReplaceQueryParameter(fixed_url, "timestamp", "");
}

return fixed_url;
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
class GURL;

bool IsWaybackMachineDisabledFor(const GURL& url);
GURL FixupWaybackQueryURL(const GURL& url);

#endif // BRAVE_COMPONENTS_BRAVE_WAYBACK_MACHINE_BRAVE_WAYBACK_MACHINE_UTILS_H_
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */

#include <string>

#include "brave/components/brave_wayback_machine/brave_wayback_machine_utils.h"
#include "brave/components/brave_wayback_machine/url_constants.h"
#include "net/base/url_util.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "url/gurl.h"

Expand All @@ -23,3 +27,18 @@ TEST(BraveWaybackMachineUtilsTest, LocalHostDisabledTest) {
EXPECT_FALSE(
IsWaybackMachineDisabledFor(GURL("https://archive.org/foobar.html")));
}

TEST(BraveWaybackMachineUtilsTest, FixupQueryURLTest) {
GURL wayback_fetch_url(std::string(kWaybackQueryURL) +
"https://www.example.com?&timestamp=20160101");
std::string timestamp_value;
EXPECT_TRUE(net::GetValueForKeyInQuery(wayback_fetch_url, "timestamp",
&timestamp_value));
EXPECT_EQ("20160101", timestamp_value);

wayback_fetch_url = FixupWaybackQueryURL(wayback_fetch_url);
EXPECT_TRUE(net::GetValueForKeyInQuery(wayback_fetch_url, "timestamp",
&timestamp_value));
// Check value is empty.
EXPECT_EQ("", timestamp_value);
}
11 changes: 5 additions & 6 deletions components/brave_wayback_machine/wayback_machine_url_fetcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@

#include "base/bind.h"
#include "base/json/json_reader.h"
#include "brave/components/brave_wayback_machine/brave_wayback_machine_utils.h"
#include "brave/components/brave_wayback_machine/url_constants.h"
#include "net/traffic_annotation/network_traffic_annotation.h"
#include "net/base/load_flags.h"
#include "net/traffic_annotation/network_traffic_annotation.h"
#include "services/network/public/cpp/resource_request.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "services/network/public/cpp/simple_url_loader.h"
Expand Down Expand Up @@ -52,14 +53,12 @@ WaybackMachineURLFetcher::WaybackMachineURLFetcher(
url_loader_factory_(std::move(url_loader_factory)) {
}

WaybackMachineURLFetcher::~WaybackMachineURLFetcher() {
}
WaybackMachineURLFetcher::~WaybackMachineURLFetcher() = default;

void WaybackMachineURLFetcher::Fetch(const GURL& url) {
auto request = std::make_unique<network::ResourceRequest>();
std::string wayback_fetch_url =
std::string(kWaybackQueryURL) + url.spec();
request->url = GURL(wayback_fetch_url);
const GURL wayback_fetch_url(std::string(kWaybackQueryURL) + url.spec());
request->url = FixupWaybackQueryURL(wayback_fetch_url);
request->credentials_mode = network::mojom::CredentialsMode::kOmit;
request->load_flags = net::LOAD_DO_NOT_SAVE_COOKIES;
wayback_url_loader_ = network::SimpleURLLoader::Create(
Expand Down

0 comments on commit 31413cd

Please sign in to comment.