Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

File deduplication #6332

Merged
merged 44 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
908a187
File deduplication
Hocuri Dec 11, 2024
28a07e0
--wip-- [skip ci]
Hocuri Dec 12, 2024
7a43aa4
Fix the tests (some of the fixes may need a new test)
Hocuri Dec 13, 2024
9dcaa41
Fix some more tests, I'll need to remove some println statements
Hocuri Dec 13, 2024
260015d
Adapt more tests and fix most of them
Hocuri Dec 18, 2024
a64c72c
test: Assume that the avatar name also changes
Hocuri Dec 18, 2024
e0fdcf4
Adapt summary.rs tests
Hocuri Dec 18, 2024
4c47d4c
Adapt some more tests, they all pass
Hocuri Dec 18, 2024
1698b6b
Adapt src/receive_imf/tests.rs, fails because there is no deduplicati…
Hocuri Dec 18, 2024
33f89f3
Deduplicate on message reception, fix all tests
Hocuri Dec 19, 2024
79b69ac
Small tweaks, clippy
Hocuri Dec 19, 2024
c22e8dc
Set deduplicated files as read-only on the file system
Hocuri Dec 23, 2024
6906db7
Set the file modification time so that it's not deleted during housek…
Hocuri Dec 23, 2024
0beb1c9
Deduplicate the code writing a file
Hocuri Dec 26, 2024
a5c51ae
Use only the first 32 characters of the hash
Hocuri Dec 26, 2024
c2cec63
Keep the code repairing Param::Filename extensions for now
Hocuri Dec 27, 2024
bb81e3b
Some renames, leave `set_file_from_bytes()` being pub for now
Hocuri Dec 27, 2024
d012bc2
Create blob dir if it doesn't exist
Hocuri Dec 27, 2024
c6e32aa
Document and expose via the C ffi
Hocuri Dec 27, 2024
72fe759
Use the actual file's name if `name` is None
Hocuri Dec 27, 2024
c5074ab
Clippy
Hocuri Dec 27, 2024
f05b057
clippy: Make functions that are not async not be async
Hocuri Dec 27, 2024
9935d1f
Fix mistake I made when rebasing
Hocuri Jan 6, 2025
44bbfbe
Documentation
Hocuri Jan 16, 2025
469baf0
create_and_deduplicate_from_bytes: check if the file content is still…
Hocuri Jan 16, 2025
f38c915
Documentation
Hocuri Jan 16, 2025
3204282
clippy
Hocuri Jan 17, 2025
a1b44ab
Fix unit tests on Windows
Hocuri Jan 18, 2025
30be873
Fix python tests
Hocuri Jan 18, 2025
e281aea
Update deltachat-ffi/src/lib.rs
Hocuri Jan 18, 2025
acad86d
Update deltachat-ffi/src/lib.rs
Hocuri Jan 18, 2025
a108e63
Update src/blob.rs
Hocuri Jan 18, 2025
272b7e3
Make create_and_deduplicate_from_bytes() a thin wrapper around create…
Hocuri Jan 18, 2025
f6c019e
Remove redundant & hard to understand comment
Hocuri Jan 18, 2025
ac35c97
clippy
Hocuri Jan 19, 2025
3fa0217
fix(housekeeping): Set files to be readonly before deleting
Hocuri Jan 19, 2025
f97882d
Only set readonly=false on Windows because on Unix, it would make the…
Hocuri Jan 19, 2025
b49b181
Septias' review
Hocuri Jan 19, 2025
1761804
2 small readability fixes
Hocuri Jan 19, 2025
7967437
more Septias review
Hocuri Jan 20, 2025
6f6825b
Copy files to the blobdir first if they are not already there
Hocuri Jan 20, 2025
4aba5a8
Merge remote-tracking branch 'origin/main' into hoc/file-deduplication
Hocuri Jan 21, 2025
9bc4071
Don't set blob files as read-only
Hocuri Jan 21, 2025
a7d138f
Update src/blob.rs
Hocuri Jan 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ toml = "0.8"
url = "2"
uuid = { version = "1", features = ["serde", "v4"] }
webpki-roots = "0.26.7"
blake3 = "1.5.5"

[dev-dependencies]
anyhow = { workspace = true, features = ["backtrace"] } # Enable `backtrace` feature in tests.
Expand Down
25 changes: 25 additions & 0 deletions deltachat-ffi/deltachat.h
Original file line number Diff line number Diff line change
Expand Up @@ -4756,6 +4756,31 @@ void dc_msg_set_override_sender_name(dc_msg_t* msg, const char* name)
void dc_msg_set_file (dc_msg_t* msg, const char* file, const char* filemime);


/**
* Sets the file associated with a message.
*
* If `name` is non-null, it is used as the file name
* and the actual current name of the file is ignored.
*
* If the source file is already in the blobdir, it will be renamed,
* otherwise it will be copied to the blobdir first.
*
* In order to deduplicate files that contain the same data,
* the file will be named as a hash of the file data.
*
* NOTE:
* - This function will rename the file. To get the new file path, call `get_file()`.
* - The file must not be modified after this function was called.
*
* @memberof dc_msg_t
* @param msg The message object. Must not be NULL.
* @param file The path of the file to attach. Must not be NULL.
* @param name The original filename of the attachment. If NULL, the current name of `file` will be used instead.
* @param filemime The MIME type of the file. NULL if you don't know or don't care.
*/
void dc_msg_set_file_and_deduplicate(dc_msg_t* msg, const char* file, const char* name, const char* filemime);


/**
* Set the dimensions associated with message object.
* Typically this is the width and the height of an image or video associated using dc_msg_set_file().
Expand Down
27 changes: 27 additions & 0 deletions deltachat-ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3835,6 +3835,33 @@ pub unsafe extern "C" fn dc_msg_set_file(
)
}

#[no_mangle]
pub unsafe extern "C" fn dc_msg_set_file_and_deduplicate(
msg: *mut dc_msg_t,
file: *const libc::c_char,
name: *const libc::c_char,
filemime: *const libc::c_char,
) {
if msg.is_null() || file.is_null() {
eprintln!("ignoring careless call to dc_msg_set_file_and_deduplicate()");
return;
}
let ffi_msg = &mut *msg;
let ctx = &*ffi_msg.context;

ffi_msg
.message
.set_file_and_deduplicate(
ctx,
as_path(file),
to_opt_string_lossy(name).as_deref(),
to_opt_string_lossy(filemime).as_deref(),
)
.context("Failed to set file")
.log_err(&*ffi_msg.context)
.ok();
}

#[no_mangle]
pub unsafe extern "C" fn dc_msg_set_dimension(
msg: *mut dc_msg_t,
Expand Down
7 changes: 5 additions & 2 deletions python/src/deltachat/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ def set_html(self, html_text):

@props.with_doc
def filename(self):
"""filename if there was an attachment, otherwise empty string."""
"""file path if there was an attachment, otherwise empty string.
If you want to get the file extension or a user-visible string,
use `basename` instead."""
return from_dc_charpointer(lib.dc_msg_get_file(self._dc_msg))

def set_file(self, path, mime_type=None):
Expand All @@ -120,7 +122,8 @@ def set_file(self, path, mime_type=None):

@props.with_doc
def basename(self) -> str:
"""basename of the attachment if it exists, otherwise empty string."""
"""The user-visible name of the attachment (incl. extension)
if it exists, otherwise empty string."""
# FIXME, it does not return basename
return from_dc_charpointer(lib.dc_msg_get_filename(self._dc_msg))

Expand Down
15 changes: 8 additions & 7 deletions python/tests/test_1_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,15 +181,16 @@ def send_and_receive_message():
msg = send_and_receive_message()
assert msg.text == "withfile"
assert open(msg.filename).read() == "some data"
msg.filename.index(basename)
assert msg.filename.endswith(ext)
msg.basename.index(basename)
assert msg.basename.endswith(ext)

msg2 = send_and_receive_message()
assert msg2.text == "withfile"
assert open(msg2.filename).read() == "some data"
msg2.filename.index(basename)
assert msg2.filename.endswith(ext)
assert msg.filename != msg2.filename
msg2.basename.index(basename)
assert msg2.basename.endswith(ext)
assert msg.filename == msg2.filename # The file is deduplicated
assert msg.basename == msg2.basename


def test_send_file_html_attachment(tmp_path, acfactory, lp):
Expand All @@ -214,8 +215,8 @@ def test_send_file_html_attachment(tmp_path, acfactory, lp):
msg = ac2.get_message_by_id(ev.data2)

assert open(msg.filename).read() == content
msg.filename.index(basename)
assert msg.filename.endswith(ext)
msg.basename.index(basename)
assert msg.basename.endswith(ext)


def test_html_message(acfactory, lp):
Expand Down
Loading
Loading