-
Notifications
You must be signed in to change notification settings - Fork 1.7k
rust: add GCS logdir support #4646
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
948b724
3ab4fac
358687e
d77430a
cf6869c
971c2c2
428aaab
b95ea29
a890789
a65fa71
61240d1
281ba86
0f22707
c32e282
2dcce13
9f812db
9e04486
9d8b1c6
85b3741
6268d67
b2fdbe4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| /* Copyright 2021 The TensorFlow Authors. All Rights Reserved. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| ==============================================================================*/ | ||
|
|
||
| //! Log directory as specified by user arguments. | ||
|
|
||
| use log::error; | ||
| use std::collections::HashMap; | ||
| use std::io::{self, Read}; | ||
| use std::path::PathBuf; | ||
|
|
||
| use crate::disk_logdir::DiskLogdir; | ||
| use crate::gcs; | ||
| use crate::logdir::{EventFileBuf, Logdir}; | ||
| use crate::types::Run; | ||
|
|
||
| /// A logdir dynamically dispatched over supported implementations. | ||
| pub enum DynLogdir { | ||
| Disk(DiskLogdir), | ||
| Gcs(gcs::Logdir), | ||
| } | ||
|
|
||
| /// A file from any one of [`DynLogdir`]'s underlying implementations. | ||
| pub enum DynFile { | ||
| Disk(<DiskLogdir as Logdir>::File), | ||
| Gcs(<gcs::Logdir as Logdir>::File), | ||
| } | ||
|
|
||
| impl DynLogdir { | ||
| /// Parses a `DynLogdir` from a user-supplied path. | ||
| /// | ||
| /// This succeeds unless the path represents a GCS logdir and no HTTP client can be opened. In | ||
| /// case of failure, errors will be logged to the active logger. | ||
| /// | ||
| /// # Panics | ||
| /// | ||
| /// May panic in debug mode if called from a thread with an active Tokio runtime; see | ||
| /// [seanmonstar/reqwest#1017]. | ||
| /// | ||
| /// [seanmonstar/reqwest#1017]: https://github.com/seanmonstar/reqwest/issues/1017 | ||
| pub fn new(path: PathBuf) -> Option<Self> { | ||
| let path_str = path.to_string_lossy(); | ||
| let gcs_path = match path_str.strip_prefix("gs://") { | ||
| // Assume that anything not starting with `gs://` is a path on disk. | ||
| None => return Some(DynLogdir::Disk(DiskLogdir::new(path))), | ||
| Some(p) => p, | ||
| }; | ||
| let mut parts = gcs_path.splitn(2, '/'); | ||
| let bucket = parts.next().unwrap().to_string(); // splitn always yields at least one element | ||
| let prefix = parts.next().unwrap_or("").to_string(); | ||
| let client = match gcs::Client::new() { | ||
| Err(e) => { | ||
| error!("Could not open GCS connection: {}", e); | ||
| return None; | ||
| } | ||
| Ok(c) => c, | ||
| }; | ||
| Some(DynLogdir::Gcs(gcs::Logdir::new(client, bucket, prefix))) | ||
| } | ||
| } | ||
|
|
||
| impl crate::logdir::Logdir for DynLogdir { | ||
| type File = DynFile; | ||
|
|
||
| fn discover(&self) -> io::Result<HashMap<Run, Vec<EventFileBuf>>> { | ||
| match self { | ||
| Self::Disk(x) => x.discover(), | ||
| Self::Gcs(x) => x.discover(), | ||
| } | ||
| } | ||
|
|
||
| fn open(&self, path: &EventFileBuf) -> io::Result<Self::File> { | ||
| match self { | ||
| Self::Disk(x) => x.open(path).map(DynFile::Disk), | ||
| Self::Gcs(x) => x.open(path).map(DynFile::Gcs), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl Read for DynFile { | ||
| fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { | ||
| match self { | ||
| Self::Disk(x) => x.read(buf), | ||
| Self::Gcs(x) => x.read(buf), | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,145 @@ | ||
| /* Copyright 2021 The TensorFlow Authors. All Rights Reserved. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| ==============================================================================*/ | ||
|
|
||
| //! Adapter from GCS to TensorBoard logdirs. | ||
|
|
||
| use log::warn; | ||
| use reqwest::StatusCode; | ||
| use std::collections::HashMap; | ||
| use std::io::{self, BufReader, Read}; | ||
| use std::path::{Path, PathBuf}; | ||
|
|
||
| use super::Client; | ||
| use crate::logdir::{EventFileBuf, EVENT_FILE_BASENAME_INFIX}; | ||
| use crate::types::Run; | ||
|
|
||
| /// A reference to a GCS object with a read offset. | ||
| pub struct File { | ||
| gcs: Client, | ||
| bucket: String, | ||
| object: String, | ||
| pos: u64, | ||
| } | ||
|
|
||
| impl File { | ||
| fn new(gcs: Client, bucket: String, object: String) -> Self { | ||
| Self { | ||
| gcs, | ||
| bucket, | ||
| object, | ||
| pos: 0, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| fn reqwest_to_io_error(e: reqwest::Error) -> io::Error { | ||
| let kind = match e.status() { | ||
| Some(StatusCode::NOT_FOUND) => io::ErrorKind::NotFound, | ||
| Some(StatusCode::FORBIDDEN) => io::ErrorKind::PermissionDenied, | ||
| Some(StatusCode::UNAUTHORIZED) => io::ErrorKind::PermissionDenied, | ||
| Some(StatusCode::REQUEST_TIMEOUT) => io::ErrorKind::TimedOut, | ||
| _ if e.is_timeout() => io::ErrorKind::TimedOut, | ||
| _ if e.is_decode() => io::ErrorKind::InvalidData, | ||
| _ => io::ErrorKind::Other, | ||
| }; | ||
| io::Error::new(kind, e) | ||
| } | ||
|
|
||
| impl Read for File { | ||
| fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { | ||
| if buf.is_empty() { | ||
| return Ok(0); | ||
| } | ||
| let range = self.pos..=self.pos + (buf.len() as u64 - 1); | ||
| let result = self | ||
| .gcs | ||
| .read(&self.bucket, &self.object, range) | ||
| .map_err(reqwest_to_io_error)?; | ||
| (&mut buf[0..result.len()]).copy_from_slice(&result); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I may not be reading this right but it looks like we have 2 copies here - one inside I doubt it's really a big deal performance-wise but if that is indeed happening, it seems slightly nicer if we could do only one copy? E.g. if we were to have our client directly return
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed! I was planning to do this as an optimization in a follow-up, I don’t see an easy way to zero-copy the bytes all the way from the HTTP There is a similar large copy in the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gotcha, fine by me to do it in a follow-up. I may be misunderstanding but isn't it impossible to do a zero-copy transfer of HTTP response bytes into an
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that I don’t see a way to do it with the // mod reqwest::blocking::response
impl Response {
/// Streams some bytes from the response body directly into the
/// given buffer. Returns the number of bytes written.
fn bytes_into<T: BufMut>(&self, x: &mut T) -> crate::Result<usize>;
}
// mod gcs::client
impl Client {
fn read(&self, bucket, object, buf: impl BufMut) -> self::Result<usize> {
let res = self.http.get(...).send()?;
res.bytes_into(&mut buf)
}
}
// mod gcs::logdir
impl Read for File {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let n = self.gcs.read(bucket, obj, buf)?;
self.pos += n as u64;
Ok(n)
}
}Or, from a lower-level perspective: at the end of the day, we receive
Correct, and that is indeed not what I am alluding to. |
||
| self.pos += result.len() as u64; | ||
| Ok(result.len()) | ||
| } | ||
| } | ||
|
|
||
| pub struct Logdir { | ||
| gcs: Client, | ||
| bucket: String, | ||
| /// Invariant: `prefix` either is empty or ends with `/`, and thus an event file name should be | ||
| /// joined onto `prefix` to form its full object name. | ||
| prefix: String, | ||
| } | ||
|
|
||
| impl Logdir { | ||
| pub fn new(gcs: Client, bucket: String, mut prefix: String) -> Self { | ||
| if !prefix.is_empty() && !prefix.ends_with('/') { | ||
| prefix.push('/'); | ||
| } | ||
| Self { | ||
| gcs, | ||
| bucket, | ||
| prefix, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Read large chunks from GCS to reduce network roundtrips. | ||
| const BUFFER_CAPACITY: usize = 1024 * 1024 * 16; | ||
|
|
||
| impl crate::logdir::Logdir for Logdir { | ||
| type File = BufReader<File>; | ||
|
|
||
| fn discover(&self) -> io::Result<HashMap<Run, Vec<EventFileBuf>>> { | ||
| let res = self.gcs.list(&self.bucket, &self.prefix); | ||
| let objects = res.map_err(reqwest_to_io_error)?; | ||
| let mut run_map: HashMap<Run, Vec<EventFileBuf>> = HashMap::new(); | ||
| for name in objects { | ||
| let name = match name.strip_prefix(&self.prefix) { | ||
| Some(x) => x, | ||
| None => { | ||
| warn!( | ||
| "Unexpected object name {:?} with putative prefix {:?}", | ||
| &name, &self.prefix | ||
| ); | ||
| continue; | ||
| } | ||
| }; | ||
| let path = PathBuf::from(name); | ||
| let is_event_file = path.file_name().map_or(false, |n| { | ||
| n.to_string_lossy().contains(EVENT_FILE_BASENAME_INFIX) | ||
| }); | ||
| if !is_event_file { | ||
| continue; | ||
| } | ||
| let mut run_relpath = path | ||
| .parent() | ||
| .map(Path::to_path_buf) | ||
| .unwrap_or_else(PathBuf::new); | ||
| if run_relpath == Path::new("") { | ||
| run_relpath.push("."); | ||
| } | ||
| let run = Run(run_relpath.display().to_string()); | ||
| run_map.entry(run).or_default().push(EventFileBuf(path)); | ||
| } | ||
| Ok(run_map) | ||
| } | ||
|
|
||
| fn open(&self, path: &EventFileBuf) -> io::Result<Self::File> { | ||
| // Paths as returned by `discover` are always valid Unicode. | ||
| let mut object = self.prefix.clone(); | ||
| object.push_str(path.0.to_string_lossy().as_ref()); | ||
| let file = File::new(self.gcs.clone(), self.bucket.clone(), object); | ||
| Ok(BufReader::with_capacity(BUFFER_CAPACITY, file)) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nfelt, re: discussion: you can see from the structure of these types
and implementations how they could be easily macroed into existence—all
except
DynLogdir::new, of course. It’s basically “the same thing withmore
$signs”:https://gist.github.com/wchargin/f44c370bebb61210dbeb11037f0b2d99
I think that I have a weak preference for the explicit implementation,
but the macro’s actually not that bad. It does make new cases a one-line
change. Open to suggestions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Explicit seems clearer to me given especially that we only have 2 implementations now.
What I had in mind when talking about "it would be nice to have a more automatic way to do this" was really more about whether there was something that would generalize not just to additional implementations but also cover the full set of trait methods as well (i.e. the dispatching logic for each method would be autogenerated without even having to include the macro template for each method). So it'd be sort of a compromise position between enums and trait objects, where the downside is that you need both a closed set of types known to the compiler and the trait must be object safe, but the upside is that you have no vtable overhead and you don't have to hand-roll the dispatching code for each trait method.
I'm not even sure that would work for this case where we have the associated type as a complicating factor, but it seems like the sort of thing that would in principle be possible to autogenerate in the basic case.
But this is just hypothesizing; no changes required or expected.