Skip to content

Commit 074f0c6

Browse files
authored
feat: range request support (#330)
* feat: range request support * cr * tests & fixes * fix etag range headers * rebase cleanup
1 parent 9e0a03e commit 074f0c6

File tree

8 files changed

+680
-84
lines changed

8 files changed

+680
-84
lines changed

iroh-api/src/api.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ impl Api for Iroh {
117117
if out.is_dir() {
118118
yield (relative_path, OutType::Dir);
119119
} else {
120-
let reader = out.pretty(resolver.clone(), Default::default())?;
120+
let reader = out.pretty(resolver.clone(), Default::default(), iroh_resolver::resolver::ResponseClip::NoClip)?;
121121
yield (relative_path, OutType::Reader(Box::new(reader)));
122122
}
123123
}

iroh-gateway/src/client.rs

+28-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::ops::Range;
12
use std::pin::Pin;
23
use std::task::Poll;
34

@@ -14,10 +15,10 @@ use iroh_metrics::{
1415
};
1516
use iroh_resolver::resolver::{
1617
CidOrDomain, ContentLoader, Metadata, Out, OutMetrics, OutPrettyReader, OutType, Resolver,
17-
Source,
18+
ResponseClip, Source,
1819
};
1920
use mime::Mime;
20-
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWrite};
21+
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, AsyncWrite};
2122
use tokio_util::io::ReaderStream;
2223
use tracing::{info, warn};
2324

@@ -46,6 +47,10 @@ impl<T: ContentLoader> PrettyStreamBody<T> {
4647
pub fn get_mime(&self) -> Option<Mime> {
4748
self.2.clone()
4849
}
50+
51+
pub fn get_size(&self) -> Option<u64> {
52+
self.1
53+
}
4954
}
5055

5156
impl<T: ContentLoader + std::marker::Unpin> http_body::Body for PrettyStreamBody<T> {
@@ -93,6 +98,7 @@ impl<T: ContentLoader + std::marker::Unpin> Client<T> {
9398
&self,
9499
path: iroh_resolver::resolver::Path,
95100
start_time: std::time::Instant,
101+
range: Option<Range<u64>>,
96102
) -> Result<(FileResult<T>, Metadata), String> {
97103
info!("get file {}", path);
98104
let res = self
@@ -107,13 +113,27 @@ impl<T: ContentLoader + std::marker::Unpin> Client<T> {
107113
let body = FileResult::Directory(res);
108114
Ok((body, metadata))
109115
} else {
116+
let mut clip = 0;
117+
if let Some(range) = &range {
118+
clip = range.end as usize;
119+
}
110120
let reader = res
111-
.pretty(self.resolver.clone(), OutMetrics { start: start_time })
121+
.pretty(
122+
self.resolver.clone(),
123+
OutMetrics { start: start_time },
124+
ResponseClip::from(clip),
125+
)
112126
.map_err(|e| e.to_string())?;
113127

114128
let mut buf_reader = tokio::io::BufReader::with_capacity(1024 * 1024, reader);
115129
let body_sample = buf_reader.fill_buf().await.map_err(|e| e.to_string())?;
116130
let mime = sniff_content_type(body_sample);
131+
if let Some(range) = range {
132+
buf_reader
133+
.seek(tokio::io::SeekFrom::Start(range.start))
134+
.await
135+
.map_err(|e| e.to_string())?;
136+
}
117137
let stream = ReaderStream::new(buf_reader);
118138

119139
let body = PrettyStreamBody(stream, metadata.size, Some(mime));
@@ -164,8 +184,11 @@ impl<T: ContentLoader + std::marker::Unpin> Client<T> {
164184
Ok(res) => {
165185
let metadata = res.metadata().clone();
166186
record_ttfb_metrics(start_time, &metadata.source);
167-
let reader =
168-
res.pretty(self.resolver.clone(), OutMetrics { start: start_time });
187+
let reader = res.pretty(
188+
self.resolver.clone(),
189+
OutMetrics { start: start_time },
190+
ResponseClip::NoClip,
191+
);
169192
match reader {
170193
Ok(mut reader) => {
171194
let mut bytes = Vec::new();

iroh-gateway/src/handlers.rs

+50-11
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use axum::{
33
body::{self, Body, HttpBody},
44
error_handling::HandleErrorLayer,
55
extract::{Extension, Path, Query},
6-
http::{header::*, StatusCode},
6+
http::{header::*, Request as HttpRequest, StatusCode},
77
response::IntoResponse,
88
routing::get,
99
BoxError, Router,
@@ -28,6 +28,7 @@ use std::{
2828
collections::HashMap,
2929
error::Error,
3030
fmt::Write,
31+
ops::Range,
3132
sync::Arc,
3233
time::{self, Duration},
3334
};
@@ -121,6 +122,7 @@ pub async fn get_handler<T: ContentLoader + std::marker::Unpin>(
121122
Path(params): Path<HashMap<String, String>>,
122123
Query(query_params): Query<GetParams>,
123124
method: http::Method,
125+
http_req: HttpRequest<Body>,
124126
request_headers: HeaderMap,
125127
) -> Result<GatewayResponse, GatewayError> {
126128
inc!(GatewayMetrics::Requests);
@@ -159,6 +161,7 @@ pub async fn get_handler<T: ContentLoader + std::marker::Unpin>(
159161
.parse()
160162
.map_err(|e: anyhow::Error| e.to_string())
161163
.map_err(|e| error(StatusCode::BAD_REQUEST, &e, &state))?;
164+
// TODO: handle 404 or error
162165
let resolved_cid = resolved_path.root();
163166

164167
if handle_only_if_cached(&request_headers, &state, resolved_cid).await? {
@@ -220,9 +223,9 @@ pub async fn get_handler<T: ContentLoader + std::marker::Unpin>(
220223
serve_car_recursive(&req, state, headers, start_time).await
221224
} else {
222225
match req.format {
223-
ResponseFormat::Raw => serve_raw(&req, state, headers, start_time).await,
226+
ResponseFormat::Raw => serve_raw(&req, state, headers, &http_req, start_time).await,
224227
ResponseFormat::Car => serve_car(&req, state, headers, start_time).await,
225-
ResponseFormat::Fs(_) => serve_fs(&req, state, headers, start_time).await,
228+
ResponseFormat::Fs(_) => serve_fs(&req, state, headers, &http_req, start_time).await,
226229
}
227230
}
228231
}
@@ -404,12 +407,18 @@ async fn serve_raw<T: ContentLoader + std::marker::Unpin>(
404407
req: &Request,
405408
state: Arc<State<T>>,
406409
mut headers: HeaderMap,
410+
http_req: &HttpRequest<Body>,
407411
start_time: std::time::Instant,
408412
) -> Result<GatewayResponse, GatewayError> {
413+
let range: Option<Range<u64>> = if http_req.headers().contains_key(RANGE) {
414+
parse_range_header(http_req.headers().get(RANGE).unwrap())
415+
} else {
416+
None
417+
};
409418
// FIXME: we currently only retrieve full cids
410419
let (body, metadata) = state
411420
.client
412-
.get_file(req.resolved_path.clone(), start_time)
421+
.get_file(req.resolved_path.clone(), start_time, range.clone())
413422
.await
414423
.map_err(|e| error(StatusCode::INTERNAL_SERVER_ERROR, &e, &state))?;
415424

@@ -423,8 +432,18 @@ async fn serve_raw<T: ContentLoader + std::marker::Unpin>(
423432
set_content_disposition_headers(&mut headers, &file_name, DISPOSITION_ATTACHMENT);
424433
set_etag_headers(&mut headers, get_etag(&req.cid, Some(req.format.clone())));
425434
add_cache_control_headers(&mut headers, metadata.clone());
426-
add_ipfs_roots_headers(&mut headers, metadata);
427-
response(StatusCode::OK, body, headers)
435+
add_ipfs_roots_headers(&mut headers, metadata.clone());
436+
437+
if let Some(mut capped_range) = range {
438+
if let Some(size) = metadata.size {
439+
capped_range.end = std::cmp::min(capped_range.end, size);
440+
}
441+
add_etag_range(&mut headers, capped_range.clone());
442+
add_content_range_headers(&mut headers, capped_range, metadata.size);
443+
response(StatusCode::PARTIAL_CONTENT, body, headers)
444+
} else {
445+
response(StatusCode::OK, body, headers)
446+
}
428447
}
429448
FileResult::Directory(_) => Err(error(
430449
StatusCode::INTERNAL_SERVER_ERROR,
@@ -445,7 +464,7 @@ async fn serve_car<T: ContentLoader + std::marker::Unpin>(
445464
// FIXME: we currently only retrieve full cids
446465
let (body, metadata) = state
447466
.client
448-
.get_file(req.resolved_path.clone(), start_time)
467+
.get_file(req.resolved_path.clone(), start_time, None)
449468
.await
450469
.map_err(|e| error(StatusCode::INTERNAL_SERVER_ERROR, &e, &state))?;
451470

@@ -509,12 +528,19 @@ async fn serve_fs<T: ContentLoader + std::marker::Unpin>(
509528
req: &Request,
510529
state: Arc<State<T>>,
511530
mut headers: HeaderMap,
531+
http_req: &HttpRequest<Body>,
512532
start_time: std::time::Instant,
513533
) -> Result<GatewayResponse, GatewayError> {
534+
let range: Option<Range<u64>> = if http_req.headers().contains_key(RANGE) {
535+
parse_range_header(http_req.headers().get(RANGE).unwrap())
536+
} else {
537+
None
538+
};
539+
514540
// FIXME: we currently only retrieve full cids
515541
let (body, metadata) = state
516542
.client
517-
.get_file(req.resolved_path.clone(), start_time)
543+
.get_file(req.resolved_path.clone(), start_time, range.clone())
518544
.await
519545
.map_err(|e| error(StatusCode::INTERNAL_SERVER_ERROR, &e, &state))?;
520546

@@ -528,7 +554,9 @@ async fn serve_fs<T: ContentLoader + std::marker::Unpin>(
528554
.try_collect()
529555
.await;
530556
match dir_list {
531-
Ok(dir_list) => serve_fs_dir(&dir_list, req, state, headers, start_time).await,
557+
Ok(dir_list) => {
558+
serve_fs_dir(&dir_list, req, state, headers, http_req, start_time).await
559+
}
532560
Err(e) => {
533561
tracing::warn!("failed to read dir: {:?}", e);
534562
Err(error(
@@ -561,7 +589,17 @@ async fn serve_fs<T: ContentLoader + std::marker::Unpin>(
561589
let content_sniffed_mime = body.get_mime();
562590
add_content_type_headers(&mut headers, &name, content_sniffed_mime);
563591
}
564-
response(StatusCode::OK, body, headers)
592+
593+
if let Some(mut capped_range) = range {
594+
if let Some(size) = metadata.size {
595+
capped_range.end = std::cmp::min(capped_range.end, size);
596+
}
597+
add_etag_range(&mut headers, capped_range.clone());
598+
add_content_range_headers(&mut headers, capped_range, metadata.size);
599+
response(StatusCode::PARTIAL_CONTENT, body, headers)
600+
} else {
601+
response(StatusCode::OK, body, headers)
602+
}
565603
}
566604
None => Err(error(
567605
StatusCode::BAD_REQUEST,
@@ -594,6 +632,7 @@ async fn serve_fs_dir<T: ContentLoader + std::marker::Unpin>(
594632
req: &Request,
595633
state: Arc<State<T>>,
596634
mut headers: HeaderMap,
635+
http_req: &HttpRequest<Body>,
597636
start_time: std::time::Instant,
598637
) -> Result<GatewayResponse, GatewayError> {
599638
let force_dir = req.query_params.force_dir.unwrap_or(false);
@@ -614,7 +653,7 @@ async fn serve_fs_dir<T: ContentLoader + std::marker::Unpin>(
614653
}
615654
let mut new_req = req.clone();
616655
new_req.resolved_path.push("index.html");
617-
return serve_fs(&new_req, state, headers, start_time).await;
656+
return serve_fs(&new_req, state, headers, http_req, start_time).await;
618657
}
619658

620659
headers.insert(CONTENT_TYPE, HeaderValue::from_str("text/html").unwrap());

iroh-gateway/src/headers.rs

+83-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use ::time::OffsetDateTime;
33
use axum::http::header::*;
44
use iroh_resolver::resolver::{CidOrDomain, Metadata, PathType};
55
use mime::Mime;
6-
use std::{fmt::Write, time};
6+
use std::{fmt::Write, ops::Range, time};
77

88
#[tracing::instrument()]
99
pub fn add_user_headers(headers: &mut HeaderMap, user_headers: HeaderMap) {
@@ -63,12 +63,47 @@ pub fn add_content_disposition_headers(
6363

6464
#[tracing::instrument()]
6565
pub fn set_content_disposition_headers(headers: &mut HeaderMap, filename: &str, disposition: &str) {
66+
// TODO: handle non-ascii filenames https://github.com/ipfs/specs/blob/main/http-gateways/PATH_GATEWAY.md#content-disposition-response-header
6667
headers.insert(
6768
CONTENT_DISPOSITION,
6869
HeaderValue::from_str(&format!("{}; filename={}", disposition, filename)).unwrap(),
6970
);
7071
}
7172

73+
#[tracing::instrument()]
74+
pub fn add_content_range_headers(headers: &mut HeaderMap, range: Range<u64>, size: Option<u64>) {
75+
if range.end == 0 {
76+
// this should never happen as it is checked for in parse_range_header
77+
// but just to avoid any footguns
78+
return;
79+
}
80+
let content_range = if let Some(size) = size {
81+
format!("bytes {}-{}/{}", range.start, range.end - 1, size)
82+
} else {
83+
format!("bytes {}-{}/{}", range.start, range.end - 1, "*")
84+
};
85+
headers.insert(
86+
CONTENT_RANGE,
87+
HeaderValue::from_str(&content_range).unwrap(),
88+
);
89+
}
90+
91+
pub fn parse_range_header(range: &HeaderValue) -> Option<Range<u64>> {
92+
// TODO: potentially support multiple ranges ie bytes=0-100,200-300
93+
let range = range.to_str().ok()?;
94+
let mut parts = range.splitn(2, '=');
95+
if parts.next() != Some("bytes") {
96+
return None;
97+
}
98+
let mut range = parts.next()?.splitn(2, '-');
99+
let start = range.next()?.parse().ok()?;
100+
let end = range.next()?.parse().ok()?;
101+
if start >= end || end == 0 {
102+
return None;
103+
}
104+
Some(Range { start, end })
105+
}
106+
72107
#[tracing::instrument()]
73108
pub fn add_cache_control_headers(headers: &mut HeaderMap, metadata: Metadata) {
74109
if metadata.path.typ() == PathType::Ipns {
@@ -98,6 +133,16 @@ pub fn set_etag_headers(headers: &mut HeaderMap, etag: String) {
98133
headers.insert(ETAG, HeaderValue::from_str(&etag).unwrap());
99134
}
100135

136+
#[tracing::instrument()]
137+
pub fn add_etag_range(headers: &mut HeaderMap, range: Range<u64>) {
138+
if headers.contains_key(ETAG) {
139+
let etag = headers.get(ETAG).unwrap().to_str().unwrap();
140+
let etag = etag.trim_end_matches('"');
141+
let etag = format!("{}.{}-{}\"", etag, range.start, range.end - 1);
142+
headers.insert(ETAG, HeaderValue::from_str(&etag).unwrap());
143+
}
144+
}
145+
101146
#[tracing::instrument()]
102147
pub fn get_etag(cid: &CidOrDomain, response_format: Option<ResponseFormat>) -> String {
103148
match cid {
@@ -231,6 +276,43 @@ mod tests {
231276
);
232277
}
233278

279+
#[test]
280+
fn parse_range_header_test() {
281+
let range = HeaderValue::from_str("bytes=0-10").unwrap();
282+
let r = parse_range_header(&range);
283+
assert_eq!(r, Some(Range { start: 0, end: 10 }));
284+
285+
let range = HeaderValue::from_str("byts=0-10").unwrap();
286+
let r = parse_range_header(&range);
287+
assert_eq!(r, None);
288+
289+
let range = HeaderValue::from_str("bytes=0-").unwrap();
290+
let r = parse_range_header(&range);
291+
assert_eq!(r, None);
292+
293+
let range = HeaderValue::from_str("bytes=10-1").unwrap();
294+
let r = parse_range_header(&range);
295+
assert_eq!(r, None);
296+
297+
let range = HeaderValue::from_str("bytes=0-0").unwrap();
298+
let r = parse_range_header(&range);
299+
assert_eq!(r, None);
300+
301+
let range = HeaderValue::from_str("bytes=100-200").unwrap();
302+
let r = parse_range_header(&range);
303+
assert_eq!(
304+
r,
305+
Some(Range {
306+
start: 100,
307+
end: 200
308+
})
309+
);
310+
311+
let range = HeaderValue::from_str("bytes=0-10,20-30").unwrap();
312+
let r = parse_range_header(&range);
313+
assert_eq!(r, None);
314+
}
315+
234316
#[test]
235317
fn add_content_disposition_headers_test() {
236318
// inline

0 commit comments

Comments
 (0)