-
Notifications
You must be signed in to change notification settings - Fork 1k
/
Copy pathstream.rs
262 lines (233 loc) · 9.2 KB
/
stream.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
use std::path::Path;
use std::pin::Pin;
use futures::StreamExt;
use rustc_hash::FxHashSet;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::warn;
use crate::Error;
/// Unzip a `.zip` archive into the target directory, without requiring `Seek`.
///
/// This is useful for unzipping files as they're being downloaded. If the archive
/// is already fully on disk, consider using `unzip_archive`, which can use multiple
/// threads to work faster in that case.
pub async fn unzip<R: tokio::io::AsyncRead + Unpin>(
reader: R,
target: impl AsRef<Path>,
) -> Result<(), Error> {
let target = target.as_ref();
let mut reader = futures::io::BufReader::with_capacity(128 * 1024, reader.compat());
let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader);
let mut directories = FxHashSet::default();
while let Some(mut entry) = zip.next_with_entry().await? {
// Construct the (expected) path to the file on-disk.
let path = entry.reader().entry().filename().as_str()?;
let path = target.join(path);
let is_dir = entry.reader().entry().dir()?;
// Either create the directory or write the file to disk.
if is_dir {
if directories.insert(path.clone()) {
fs_err::tokio::create_dir_all(path).await?;
}
} else {
if let Some(parent) = path.parent() {
if directories.insert(parent.to_path_buf()) {
fs_err::tokio::create_dir_all(parent).await?;
}
}
// We don't know the file permissions here, because we haven't seen the central directory yet.
let file = fs_err::tokio::File::create(&path).await?;
let mut writer =
if let Ok(size) = usize::try_from(entry.reader().entry().uncompressed_size()) {
tokio::io::BufWriter::with_capacity(size, file)
} else {
tokio::io::BufWriter::new(file)
};
let mut reader = entry.reader_mut().compat();
tokio::io::copy(&mut reader, &mut writer).await?;
}
// Close current file to get access to the next one. See docs:
// https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/
zip = entry.skip().await?;
}
// On Unix, we need to set file permissions, which are stored in the central directory, at the
// end of the archive. The `ZipFileReader` reads until it sees a central directory signature,
// which indicates the first entry in the central directory. So we continue reading from there.
#[cfg(unix)]
{
use std::fs::Permissions;
use std::os::unix::fs::PermissionsExt;
let mut directory = async_zip::base::read::cd::CentralDirectoryReader::new(&mut reader);
while let Some(entry) = directory.next().await? {
if entry.dir()? {
continue;
}
let Some(mode) = entry.unix_permissions() else {
continue;
};
// The executable bit is the only permission we preserve, otherwise we use the OS defaults.
// https://github.com/pypa/pip/blob/3898741e29b7279e7bffe044ecfbe20f6a438b1e/src/pip/_internal/utils/unpacking.py#L88-L100
let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 {
// Construct the (expected) path to the file on-disk.
let path = entry.filename().as_str()?;
let path = target.join(path);
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
fs_err::tokio::set_permissions(
&path,
Permissions::from_mode(permissions.mode() | 0o111),
)
.await?;
}
}
}
Ok(())
}
/// Unpack the given tar archive into the destination directory.
///
/// This is equivalent to `archive.unpack_in(dst)`, but it also preserves the executable bit.
async fn untar_in<R: tokio::io::AsyncRead + Unpin, P: AsRef<Path>>(
archive: &mut tokio_tar::Archive<R>,
dst: P,
) -> std::io::Result<()> {
let mut entries = archive.entries()?;
let mut pinned = Pin::new(&mut entries);
while let Some(entry) = pinned.next().await {
// Unpack the file into the destination directory.
let mut file = entry?;
// On Windows, skip symlink entries, as they're not supported. pip recursively copies the
// symlink target instead.
if cfg!(windows) && file.header().entry_type().is_symlink() {
warn!(
"Skipping symlink in tar archive: {}",
file.path()?.display()
);
continue;
}
file.unpack_in(dst.as_ref()).await?;
// Preserve the executable bit.
#[cfg(unix)]
{
use std::fs::Permissions;
use std::os::unix::fs::PermissionsExt;
let entry_type = file.header().entry_type();
if entry_type.is_file() || entry_type.is_hard_link() {
let mode = file.header().mode()?;
let has_any_executable_bit = mode & 0o111;
if has_any_executable_bit != 0 {
if let Some(path) = crate::tar::unpacked_at(dst.as_ref(), &file.path()?) {
let permissions = fs_err::tokio::metadata(&path).await?.permissions();
fs_err::tokio::set_permissions(
&path,
Permissions::from_mode(permissions.mode() | 0o111),
)
.await?;
}
}
}
}
}
Ok(())
}
/// Unzip a `.tar.gz` archive into the target directory, without requiring `Seek`.
///
/// This is useful for unpacking files as they're being downloaded.
pub async fn untar_gz<R: tokio::io::AsyncRead + Unpin>(
reader: R,
target: impl AsRef<Path>,
) -> Result<(), Error> {
let reader = tokio::io::BufReader::new(reader);
let decompressed_bytes = async_compression::tokio::bufread::GzipDecoder::new(reader);
let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes)
.set_preserve_mtime(false)
.build();
untar_in(&mut archive, target.as_ref()).await?;
Ok(())
}
/// Unzip a `.tar.bz2` archive into the target directory, without requiring `Seek`.
///
/// This is useful for unpacking files as they're being downloaded.
pub async fn untar_bz2<R: tokio::io::AsyncRead + Unpin>(
reader: R,
target: impl AsRef<Path>,
) -> Result<(), Error> {
let reader = tokio::io::BufReader::new(reader);
let decompressed_bytes = async_compression::tokio::bufread::BzDecoder::new(reader);
let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes)
.set_preserve_mtime(false)
.build();
untar_in(&mut archive, target.as_ref()).await?;
Ok(())
}
/// Unzip a `.tar.zst` archive into the target directory, without requiring `Seek`.
///
/// This is useful for unpacking files as they're being downloaded.
pub async fn untar_zst<R: tokio::io::AsyncRead + Unpin>(
reader: R,
target: impl AsRef<Path>,
) -> Result<(), Error> {
let reader = tokio::io::BufReader::new(reader);
let decompressed_bytes = async_compression::tokio::bufread::ZstdDecoder::new(reader);
let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes)
.set_preserve_mtime(false)
.build();
Ok(untar_in(&mut archive, target.as_ref()).await?)
}
/// Unzip a `.zip`, `.tar.gz`, or `.tar.bz2` archive into the target directory, without requiring `Seek`.
pub async fn archive<R: tokio::io::AsyncRead + Unpin>(
reader: R,
source: impl AsRef<Path>,
target: impl AsRef<Path>,
) -> Result<(), Error> {
// `.zip`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("zip"))
{
unzip(reader, target).await?;
return Ok(());
}
// `.tar.gz`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("gz"))
&& source.as_ref().file_stem().is_some_and(|stem| {
Path::new(stem)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
})
{
untar_gz(reader, target).await?;
return Ok(());
}
// `.tar.bz2`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("bz2"))
&& source.as_ref().file_stem().is_some_and(|stem| {
Path::new(stem)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
})
{
untar_bz2(reader, target).await?;
return Ok(());
}
// `.tar.zst`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("zst"))
&& source.as_ref().file_stem().is_some_and(|stem| {
Path::new(stem)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
})
{
untar_zst(reader, target).await?;
return Ok(());
}
Err(Error::UnsupportedArchive(source.as_ref().to_path_buf()))
}