Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 55 additions & 35 deletions src/db/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,35 +63,53 @@ pub struct Blob {
}

pub fn get_path(conn: &Connection, path: &str) -> Option<Blob> {
let rows = conn.query("SELECT path, mime, date_updated, content
FROM files
WHERE path = $1", &[&path]).unwrap();
if let Some(client) = s3_client() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it would be useful to run the database query anyway, and instead of switching on content = 'in-s3', switch on whether the row exists in the table? I know it makes no difference for the production service, but it feels more resilient for anyone who hosts their own version of it and transitions to S3 after being online for a while.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer not to, as it does add some amount of overhead and to an extent makes the code more complex. move-to-s3 should be the preferred route for migrating local installations to S3.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, i'm just thinking about the period between saving S3 credentials into the environment and the moment cratesfyi database move-to-s3 finishes. Any calls to the website in that time will fail because we won't be looking at the database for the files.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true. move-to-s3 is no longer an operation that can run in the background. Since it's likely to never be used again outside local installations though that seems fine -- and if in the future there is a desire to make that happen the code change isn't really that bad.

let res = client.get_object(GetObjectRequest {
bucket: "rust-docs-rs".into(),
key: path.into(),
..Default::default()
}).sync();

if rows.len() == 0 {
None
} else {
let row = rows.get(0);
let mut content = row.get(3);
if content == b"in-s3" {
let client = s3_client();
content = client.and_then(|c| c.get_object(GetObjectRequest {
bucket: "rust-docs-rs".into(),
key: path.into(),
..Default::default()
}).sync().ok()).and_then(|r| r.body).map(|b| {
let mut b = b.into_blocking_read();
let mut content = Vec::new();
b.read_to_end(&mut content).unwrap();
content
}).unwrap();
let res = match res {
Ok(r) => r,
Err(err) => {
debug!("error fetching {}: {:?}", path, err);
return None;
}
};

let mut b = res.body.unwrap().into_blocking_read();
let mut content = Vec::new();
b.read_to_end(&mut content).unwrap();

let last_modified = res.last_modified.unwrap();
let last_modified = time::strptime(&last_modified, "%a, %d %b %Y %H:%M:%S %Z")
.unwrap_or_else(|e| panic!("failed to parse {:?} as timespec: {:?}", last_modified, e))
.to_timespec();

Some(Blob {
path: row.get(0),
mime: row.get(1),
date_updated: row.get(2),
path: path.into(),
mime: res.content_type.unwrap(),
date_updated: last_modified,
content,
})
} else {
let rows = conn.query("SELECT path, mime, date_updated, content
FROM files
WHERE path = $1", &[&path]).unwrap();

if rows.len() == 0 {
None
} else {
let row = rows.get(0);

Some(Blob {
path: row.get(0),
mime: row.get(1),
date_updated: row.get(2),
content: row.get(3),
})
}
}
}

Expand Down Expand Up @@ -188,18 +206,21 @@ pub fn add_path_into_database<P: AsRef<Path>>(conn: &Connection,
)
};

// check if file already exists in database
let rows = try!(conn.query("SELECT COUNT(*) FROM files WHERE path = $1", &[&path]));
// If AWS credentials are configured, don't insert/update the database
if client.is_none() {
// check if file already exists in database
let rows = try!(conn.query("SELECT COUNT(*) FROM files WHERE path = $1", &[&path]));

let content = content.unwrap_or_else(|| "in-s3".to_owned().into());
let content = content.expect("content never None if client is None");

if rows.get(0).get::<usize, i64>(0) == 0 {
try!(trans.query("INSERT INTO files (path, mime, content) VALUES ($1, $2, $3)",
&[&path, &mime, &content]));
} else {
try!(trans.query("UPDATE files SET mime = $2, content = $3, date_updated = NOW() \
WHERE path = $1",
&[&path, &mime, &content]));
if rows.get(0).get::<usize, i64>(0) == 0 {
try!(trans.query("INSERT INTO files (path, mime, content) VALUES ($1, $2, $3)",
&[&path, &mime, &content]));
} else {
try!(trans.query("UPDATE files SET mime = $2, content = $3, date_updated = NOW() \
WHERE path = $1",
&[&path, &mime, &content]));
}
}
}

Expand Down Expand Up @@ -256,8 +277,7 @@ pub fn move_to_s3(conn: &Connection, n: usize) -> Result<usize> {
use ::futures::future::Future;
match rt.block_on(::futures::future::join_all(futures)) {
Ok(paths) => {
let statement = trans.prepare("UPDATE files SET content = E'in-s3' WHERE path = $1")
.unwrap();
let statement = trans.prepare("DELETE FROM files WHERE path = $1").unwrap();
for path in paths {
statement.execute(&[&path]).unwrap();
}
Expand Down
6 changes: 3 additions & 3 deletions src/docbuilder/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl Default for DocBuilderOptions {
crates_io_index_path: crates_io_index_path,

chroot_user: "cratesfyi".to_string(),
container_name: "cratesfyi-container".to_string(),
container_name: env::var("CRATESFYI_CONTAINER_NAME").expect("CRATESFYI_CONTAINER_NAME"),

keep_build_directory: false,
skip_if_exists: false,
Expand Down Expand Up @@ -104,9 +104,9 @@ impl DocBuilderOptions {


fn generate_paths(prefix: PathBuf) -> (PathBuf, PathBuf, PathBuf, PathBuf) {

let name = env::var_os("CRATESFYI_CONTAINER_NAME").expect("CRATESFYI_CONTAINER_NAME");
let destination = PathBuf::from(&prefix).join("documentations");
let chroot_path = PathBuf::from(&prefix).join("cratesfyi-container/rootfs");
let chroot_path = PathBuf::from(&prefix).join(&name).join("rootfs");
let crates_io_index_path = PathBuf::from(&prefix).join("crates.io-index");

(prefix, destination, chroot_path, crates_io_index_path)
Expand Down