Skip to content

Commit

Permalink
feat(layers/mime-guess): add a layer that can automatically set `Cont…
Browse files Browse the repository at this point in the history
…ent-Type` based on the extension in the path. (#4912)

* feat(layers/mime-guess): Add a layer that can automatically set `Content-Type` based on the extension in the path.

* remove `Copy` for `MimeGuessLayer`

* use non_exhaustive unit-like struct instead of a tuple struct

* update `opwrite_with_mime` and `rpstat_with_mime` with early return

* remove control_group in tests

* tests: `op_guess` -> `op`

* remove unnecessary listings of public APIs from the doc.
  • Loading branch information
czy-29 authored Jul 19, 2024
1 parent 7985cfd commit d9d4279
Show file tree
Hide file tree
Showing 4 changed files with 235 additions and 0 deletions.
1 change: 1 addition & 0 deletions core/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ executors-tokio = ["tokio/rt"]
layers-chaos = ["dep:rand"]
# Enable layers metrics support
layers-metrics = ["dep:metrics"]
# Enable layers mime_guess support
layers-mime-guess = ["dep:mime_guess"]
# Enable layers prometheus support, with tikv/prometheus-rs crate
layers-prometheus = ["dep:prometheus"]
# Enable layers prometheus support, with prometheus-client crate
Expand Down Expand Up @@ -359,6 +361,8 @@ await-tree = { version = "0.1.1", optional = true }
governor = { version = "0.6.0", optional = true, features = ["std"] }
# for layers-metrics
metrics = { version = "0.23", optional = true }
# for layers-mime-guess
mime_guess = { version = "2.0.5", optional = true }
# for layers-fastrace
fastrace = { version = "0.6", optional = true }
# for layers-opentelemetry
Expand Down
225 changes: 225 additions & 0 deletions core/src/layers/mime_guess.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::{raw::*, Result};

/// A layer that can automatically set `Content-Type` based on the file extension in the path.
///
/// # MimeGuess
///
/// This layer uses [mime_guess](https://crates.io/crates/mime_guess) to automatically
/// set `Content-Type` based on the file extension in the operation path.
///
/// However, please note that this layer will not overwrite the `content_type` you manually set,
/// nor will it overwrite the `content_type` provided by backend services.
///
/// A simple example is that for object storage backends, when you call `stat`, the backend will
/// provide `content_type` information, and `mime_guess` will not be called, but will use
/// the `content_type` provided by the backend.
///
/// But if you use the [Fs](../services/struct.Fs.html) backend to call `stat`, the backend will
/// not provide `content_type` information, and our `mime_guess` will be called to provide you with
/// appropriate `content_type` information.
///
/// Another thing to note is that using this layer does not necessarily mean that the result will 100%
/// contain `content_type` information. If the extension of your path is custom or an uncommon type,
/// the returned result will still not contain `content_type` information (the specific condition here is
/// when [mime_guess::from_path::first_raw](https://docs.rs/mime_guess/latest/mime_guess/struct.MimeGuess.html#method.first_raw)
/// returns `None`).
///
/// # Examples
///
/// ```no_run
/// use anyhow::Result;
/// use opendal::layers::MimeGuessLayer;
/// use opendal::services;
/// use opendal::Operator;
/// use opendal::Scheme;
///
/// let _ = Operator::new(services::Memory::default())
/// .expect("must init")
/// .layer(MimeGuessLayer::default())
/// .finish();
/// ```
#[derive(Debug, Clone, Default)]
#[non_exhaustive]
pub struct MimeGuessLayer {}

impl<A: Access> Layer<A> for MimeGuessLayer {
type LayeredAccess = MimeGuessAccessor<A>;

fn layer(&self, inner: A) -> Self::LayeredAccess {
MimeGuessAccessor(inner)
}
}

#[derive(Clone, Debug)]
pub struct MimeGuessAccessor<A: Access>(A);

fn mime_from_path(path: &str) -> Option<&str> {
mime_guess::from_path(path).first_raw()
}

fn opwrite_with_mime(path: &str, op: OpWrite) -> OpWrite {
if op.content_type().is_some() {
return op;
}

if let Some(mime) = mime_from_path(path) {
return op.with_content_type(mime);
}

op
}

fn rpstat_with_mime(path: &str, rp: RpStat) -> RpStat {
rp.map_metadata(|metadata| {
if metadata.content_type().is_some() {
return metadata;
}

if let Some(mime) = mime_from_path(path) {
return metadata.with_content_type(mime.into());
}

metadata
})
}

impl<A: Access> LayeredAccess for MimeGuessAccessor<A> {
type Inner = A;
type Reader = A::Reader;
type BlockingReader = A::BlockingReader;
type Writer = A::Writer;
type BlockingWriter = A::BlockingWriter;
type Lister = A::Lister;
type BlockingLister = A::BlockingLister;

fn inner(&self) -> &Self::Inner {
&self.0
}

async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> {
self.inner()
.write(path, opwrite_with_mime(path, args))
.await
}

fn blocking_write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> {
self.inner()
.blocking_write(path, opwrite_with_mime(path, args))
}

async fn stat(&self, path: &str, args: OpStat) -> Result<RpStat> {
self.inner()
.stat(path, args)
.await
.map(|rp| rpstat_with_mime(path, rp))
}

fn blocking_stat(&self, path: &str, args: OpStat) -> Result<RpStat> {
self.inner()
.blocking_stat(path, args)
.map(|rp| rpstat_with_mime(path, rp))
}

async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> {
self.inner().read(path, args).await
}

async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> {
self.inner().list(path, args).await
}

fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> {
self.inner().blocking_read(path, args)
}

fn blocking_list(&self, path: &str, args: OpList) -> Result<(RpList, Self::BlockingLister)> {
self.inner().blocking_list(path, args)
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::{services::Memory, Metakey, Operator};

const DATA: &str = "<html>test</html>";
const CUSTOM: &str = "text/custom";
const HTML: &str = "text/html";

#[tokio::test]
async fn test_async() {
let op = Operator::new(Memory::default())
.unwrap()
.layer(MimeGuessLayer::default())
.finish();

op.write("test0.html", DATA).await.unwrap();
assert_eq!(
op.stat("test0.html").await.unwrap().content_type(),
Some(HTML)
);

op.write("test1.asdfghjkl", DATA).await.unwrap();
assert_eq!(
op.stat("test1.asdfghjkl").await.unwrap().content_type(),
None
);

op.write_with("test2.html", DATA)
.content_type(CUSTOM)
.await
.unwrap();
assert_eq!(
op.stat("test2.html").await.unwrap().content_type(),
Some(CUSTOM)
);

let entries = op.list_with("").metakey(Metakey::Complete).await.unwrap();
assert_eq!(entries[0].metadata().content_type(), Some(HTML));
assert_eq!(entries[1].metadata().content_type(), None);
assert_eq!(entries[2].metadata().content_type(), Some(CUSTOM));
}

#[test]
fn test_blocking() {
let op = Operator::new(Memory::default())
.unwrap()
.layer(MimeGuessLayer::default())
.finish()
.blocking();

op.write("test0.html", DATA).unwrap();
assert_eq!(op.stat("test0.html").unwrap().content_type(), Some(HTML));

op.write("test1.asdfghjkl", DATA).unwrap();
assert_eq!(op.stat("test1.asdfghjkl").unwrap().content_type(), None);

op.write_with("test2.html", DATA)
.content_type(CUSTOM)
.call()
.unwrap();
assert_eq!(op.stat("test2.html").unwrap().content_type(), Some(CUSTOM));

let entries = op.list_with("").metakey(Metakey::Complete).call().unwrap();
assert_eq!(entries[0].metadata().content_type(), Some(HTML));
assert_eq!(entries[1].metadata().content_type(), None);
assert_eq!(entries[2].metadata().content_type(), Some(CUSTOM));
}
}
5 changes: 5 additions & 0 deletions core/src/layers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ mod metrics;
#[cfg(feature = "layers-metrics")]
pub use self::metrics::MetricsLayer;

#[cfg(feature = "layers-mime-guess")]
mod mime_guess;
#[cfg(feature = "layers-mime-guess")]
pub use self::mime_guess::MimeGuessLayer;

#[cfg(feature = "layers-prometheus")]
mod prometheus;
#[cfg(feature = "layers-prometheus")]
Expand Down

0 comments on commit d9d4279

Please sign in to comment.