Skip to content

Commit b792a00

Browse files
jshajyn514
authored andcommitted
Set a minimum for last-modified in sitemap
This should encourage Google to recrawl pages that now use the /latest/ URLs and have canonicalization links.
1 parent 36cca45 commit b792a00

File tree

1 file changed

+24
-1
lines changed

1 file changed

+24
-1
lines changed

src/web/sitemap.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::{
66
web::error::Nope,
77
web::page::WebPage,
88
};
9-
use chrono::{DateTime, Utc};
9+
use chrono::{DateTime, TimeZone, Utc};
1010
use iron::{
1111
headers::ContentType,
1212
mime::{Mime, SubLevel, TopLevel},
@@ -86,6 +86,9 @@ pub fn sitemap_handler(req: &mut Request) -> IronResult<Response> {
8686
target_name: row.get("target_name"),
8787
last_modified: row
8888
.get::<_, DateTime<Utc>>("release_time")
89+
// On Aug 27 2022 we added `<link rel="canonical">` to all pages,
90+
// so they should all get recrawled if they haven't been since then.
91+
.max(Utc.ymd(2022, 8, 28).and_hms(0, 0, 0))
8992
.format("%+")
9093
.to_string(),
9194
})
@@ -227,6 +230,26 @@ mod tests {
227230
})
228231
}
229232

233+
#[test]
234+
fn sitemap_max_age() {
235+
wrapper(|env| {
236+
let web = env.frontend();
237+
238+
use chrono::{TimeZone, Utc};
239+
env.fake_release()
240+
.name("some_random_crate")
241+
.release_time(Utc.ymd(2020, 1, 1).and_hms(0, 0, 0))
242+
.create()?;
243+
244+
let response = web.get("/-/sitemap/s/sitemap.xml").send()?;
245+
assert!(response.status().is_success());
246+
247+
let content = response.text()?;
248+
assert!(content.contains(&"2022-08-28T00:00:00+00:00"));
249+
Ok(())
250+
})
251+
}
252+
230253
#[test]
231254
fn about_page() {
232255
wrapper(|env| {

0 commit comments

Comments
 (0)