diff --git a/deepwell/Cargo.lock b/deepwell/Cargo.lock index a8eaf09ed5..0c09c692e9 100644 --- a/deepwell/Cargo.lock +++ b/deepwell/Cargo.lock @@ -690,7 +690,7 @@ dependencies = [ [[package]] name = "deepwell" -version = "2024.9.14" +version = "2024.10.6" dependencies = [ "anyhow", "argon2", diff --git a/deepwell/Cargo.toml b/deepwell/Cargo.toml index 3e72bbb702..1414eab3f8 100644 --- a/deepwell/Cargo.toml +++ b/deepwell/Cargo.toml @@ -8,7 +8,7 @@ keywords = ["wikijump", "api", "backend", "wiki"] categories = ["asynchronous", "database", "web-programming::http-server"] exclude = [".gitignore", ".editorconfig"] -version = "2024.9.14" +version = "2024.10.6" authors = ["Emmie Smith "] edition = "2021" diff --git a/deepwell/README.md b/deepwell/README.md index b72631beed..e89034a279 100644 --- a/deepwell/README.md +++ b/deepwell/README.md @@ -90,6 +90,60 @@ $ cargo fmt # Ensure code is formatted $ cargo clippy # Check code for lints ``` +### Running requests + +When you have a local instance of DEEPWELL running, probably in the developement `docker-compose` instance, you may want to run requests against it. You can easily accomplish this with a tool like `curl`. The basic format is: + +```sh +$ curl -X POST --json '{"jsonrpc":"2.0","method":"","params":,"id":}' http://localhost:2747/jsonrpc +``` + +Where you pass in the JSONRPC method name and corresponding JSON data. The ID value distinguishes between notices and requests, see the JSONRPC specification for information. + +For instance: + +```sh +$ curl -X POST --json '{"jsonrpc":"2.0","method":"echo","params":{"my":["json","data"]},"id":0}' http://localhost:2747/jsonrpc + +{"jsonrpc":"2.0","id":0,"result":{"my":["json","data"]}} + +$ curl -X POST --json '{"jsonrpc":"2.0","method":"ping","id":0}' http://localhost:2747/jsonrpc + +{"jsonrpc":"2.0","id":0,"result":"Pong!"} +``` + +If you are unfamiliar with JSONRPC, you can read about it [on its website](https://www.jsonrpc.org/specification). For instance, one quirk is that for methods which take a non-list or object argument, you specify it as a list of one element. + +There is also a helper script to assist with making JSONRPC requests, `scripts/request.py`. It requires the popular [`requests`](https://requests.readthedocs.io/) library to be installed. + +Example usage: + +```sh +$ scripts/request.py echo '{ "my": ["json","data"] }' +OK {'my': ['json', 'data']} + +$ scripts/request.py ping +OK Pong! + +$ scripts/request.py error +ERR +{'code': 4000, + 'data': None, + 'message': 'The request is in some way malformed or incorrect'} +``` + +**NOTE:** When you are uploading files to local minio as part of testing file upload flows, **you must leave the URL unmodified**. The host `files` is used as the S3 provider, which is a problem since this is not a valid host on your development machine, which necessitates use of `--connect-to` to tell `curl` to connect to the appropriate location instead: + +```sh +$ curl --connect-to files:9000:localhost:9000 --upload-file +``` + +Alternatively, you can use the helper script: + +```sh +$ scripts/upload.sh +``` + ### Database There are two important directories related to the management of the database (which DEEPWELL can be said to "own"). They are both fairly self-explanatory: diff --git a/deepwell/config.example.toml b/deepwell/config.example.toml index 69ee918a70..65d4befa49 100644 --- a/deepwell/config.example.toml +++ b/deepwell/config.example.toml @@ -355,6 +355,28 @@ minimum-name-bytes = 3 # Set to 0 to disable. refill-name-change-days = 90 + +[file] + +# The length of paths used for S3 presigned URLs. +# +# The value doesn't particularly matter so long as it is sufficiently long +# to avoid collisions. +# +# Just to be safe, the generation mechanism is the same as for session tokens. +presigned-path-length = 32 + +# How long a presigned URL lasts before expiry. +# +# The value should only be a few minutes, and no longer than 12 hours. +presigned-expiration-minutes = 5 + +# The maximum blob size allowed globally, in KiB. +maximum-blob-size-kb = 1_048_576 + +# The maximum blob size allowed for user avatars, in KiB. +maximum-avatar-size-kb = 250 + [message] # The maximum size of a message's subject line, in bytes. diff --git a/deepwell/migrations/20220906103252_deepwell.sql b/deepwell/migrations/20220906103252_deepwell.sql index d05b602bd9..d6ff7f3551 100644 --- a/deepwell/migrations/20220906103252_deepwell.sql +++ b/deepwell/migrations/20220906103252_deepwell.sql @@ -411,6 +411,26 @@ CREATE TABLE page_vote ( CHECK ((disabled_at IS NULL) = (disabled_by IS NULL)) ); +-- +-- Blobs +-- + +-- Manages blobs that are being uploaded by the user +CREATE TABLE blob_pending ( + external_id TEXT PRIMARY KEY, + created_by BIGINT NOT NULL REFERENCES "user"(user_id), + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), + expires_at TIMESTAMP WITH TIME ZONE NOT NULL, + expected_length BIGINT NOT NULL CHECK (expected_length >= 0), + s3_path TEXT NOT NULL CHECK (length(s3_path) > 1), + s3_hash BYTEA, -- NULL means not yet moved, NOT NULL means deleted from s3_path + presign_url TEXT NOT NULL CHECK (length(presign_url) > 1), + + CHECK (expires_at > created_at), -- expiration time is not in the relative past + CHECK (length(external_id) = 24), -- default length for a cuid2 + CHECK (s3_hash IS NULL OR length(s3_hash) = 64) -- SHA-512 hash size, if present +); + -- -- Files -- @@ -514,7 +534,7 @@ CREATE TYPE message_recipient_type AS ENUM ( -- A "record" is the underlying message data, with its contents, attachments, -- and associated metadata such as sender and recipient(s). CREATE TABLE message_record ( - external_id TEXT PRIMARY KEY, + external_id TEXT PRIMARY KEY, -- ID comes from message_draft created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), drafted_at TIMESTAMP WITH TIME ZONE NOT NULL, retracted_at TIMESTAMP WITH TIME ZONE, diff --git a/deepwell/scripts/request.py b/deepwell/scripts/request.py new file mode 100755 index 0000000000..0accb2dd90 --- /dev/null +++ b/deepwell/scripts/request.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import sys + +import requests + + +def color_settings(value): + match value: + case "auto": + fd = sys.stdout.fileno() + return os.isatty(fd) + case "always": + return True + case "never": + return False + + +def print_data(data): + if isinstance(data, str): + print(data) + else: + # Only print on multiple lines if it's "large" + output = json.dumps(data) + if len(output) > 16: + output = json.dumps(data, indent=4) + print() + print(output) + + +def deepwell_request(endpoint, method, data, id=0, color=False): + r = requests.post( + endpoint, + json={ + "jsonrpc": "2.0", + "method": method, + "params": data, + "id": id, + }, + ) + + if color: + green_start = "\x1b[32m" + red_start = "\x1b[31m" + color_end = "\x1b[0m" + else: + green_start = "" + red_start = "" + color_end = "" + + match r.json(): + case {"jsonrpc": "2.0", "id": id, "result": data}: + print(f"{green_start}OK {color_end}", end="") + print_data(data) + return 0 + case {"jsonrpc": "2.0", "id": id, "error": data}: + print(f"{red_start}ERR {color_end}", end="") + print_data(data) + return 1 + + +if __name__ == "__main__": + argparser = argparse.ArgumentParser( + "deepwell-request", + description="Helper script to run DEEPWELL JSONRPC requests", + ) + argparser.add_argument( + "-H", + "--host", + default="localhost", + ) + argparser.add_argument( + "-p", + "--port", + type=int, + default=2747, + ) + argparser.add_argument( + "-s", + "--https", + dest="scheme", + action="store_const", + const="https", + default="http", + ) + argparser.add_argument( + "-I", + "--id", + default=0, + ) + argparser.add_argument( + "-C", + "--color", + choices=["never", "auto", "always"], + default="auto", + ) + argparser.add_argument("method") + argparser.add_argument("data", nargs="?", type=json.loads, default="{}") + args = argparser.parse_args() + enable_color = color_settings(args.color) + + endpoint = f"{args.scheme}://{args.host}:{args.port}/jsonrpc" + exit_code = deepwell_request( + endpoint, + args.method, + args.data, + args.id, + color=enable_color, + ) + + sys.exit(exit_code) diff --git a/deepwell/scripts/upload.sh b/deepwell/scripts/upload.sh new file mode 100755 index 0000000000..375a6a80f7 --- /dev/null +++ b/deepwell/scripts/upload.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -eu + +# +# Helper script to upload files to a local S3 store for testing upload flows. +# + +if [[ $# -ne 2 ]]; then + echo >&2 "Usage: $0 " + exit 1 +fi + +# Allow either order of arguments, for convenience. +# If it starts with HTTP or HTTPS, we assume it's the presign URL. +if [[ $1 = http:* || $1 = https:* ]]; then + path="$2" + url="$1" +else + path="$1" + url="$2" +fi + +exec \ + curl \ + --connect-to 'files:9000:localhost:9000' \ + --upload-file "$path" \ + "$url" diff --git a/deepwell/src/api.rs b/deepwell/src/api.rs index 5f5fd75434..19606bf5fa 100644 --- a/deepwell/src/api.rs +++ b/deepwell/src/api.rs @@ -28,9 +28,9 @@ use crate::config::{Config, Secrets}; use crate::endpoints::{ - auth::*, category::*, domain::*, email::*, file::*, file_revision::*, link::*, - locale::*, message::*, misc::*, page::*, page_revision::*, parent::*, site::*, - site_member::*, text::*, user::*, user_bot::*, view::*, vote::*, + auth::*, blob::*, category::*, domain::*, email::*, file::*, file_revision::*, + link::*, locale::*, message::*, misc::*, page::*, page_revision::*, parent::*, + site::*, site_member::*, text::*, user::*, user_bot::*, view::*, vote::*, }; use crate::locales::Localizations; use crate::services::blob::MimeAnalyzer; @@ -174,6 +174,7 @@ async fn build_module(app_state: ServerState) -> anyhow::Result anyhow::Result. + */ + +use super::prelude::*; +use crate::services::blob::{ + BlobMetadata, CancelBlobUpload, GetBlobOutput, StartBlobUpload, StartBlobUploadOutput, +}; +use crate::services::Result; +use crate::web::Bytes; + +/// Temporary endpoint to get any blob by hash. +/// Primarily for user avatars, which have no other +/// way of getting the data at the moment. +pub async fn blob_get( + ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result { + info!("Getting blob for S3 hash"); + let hash: Bytes = params.parse()?; + let data = BlobService::get(ctx, hash.as_ref()).await?; + + let BlobMetadata { + mime, + size, + created_at, + } = BlobService::get_metadata(ctx, hash.as_ref()).await?; + + Ok(GetBlobOutput { + data, + mime, + size, + created_at, + }) +} + +/// Cancel a started upload by removing the pending blob. +pub async fn blob_cancel( + ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result<()> { + info!("Cancelling a pending blob upload"); + + let CancelBlobUpload { + user_id, + pending_blob_id, + } = params.parse()?; + + BlobService::cancel_upload(ctx, user_id, &pending_blob_id).await +} + +/// Starts a new upload by creating a pending blob. +pub async fn blob_upload( + ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result { + info!("Creating new pending blob upload"); + let input: StartBlobUpload = params.parse()?; + BlobService::start_upload(ctx, input).await +} diff --git a/deepwell/src/endpoints/file.rs b/deepwell/src/endpoints/file.rs index 773f19865e..cbee032461 100644 --- a/deepwell/src/endpoints/file.rs +++ b/deepwell/src/endpoints/file.rs @@ -23,33 +23,13 @@ use crate::models::file::Model as FileModel; use crate::models::file_revision::Model as FileRevisionModel; use crate::services::blob::BlobService; use crate::services::file::{ - DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, GetBlobOutput, + CreateFile, CreateFileOutput, DeleteFile, DeleteFileOutput, EditFile, EditFileOutput, GetFileDetails, GetFileOutput, MoveFile, MoveFileOutput, RestoreFile, - RestoreFileOutput, UploadFile, UploadFileOutput, + RestoreFileOutput, }; use crate::services::Result; use crate::web::{Bytes, FileDetails}; -/// Temporary endpoint to get any blob by hash. -/// Primarily for user avatars, which have no other -/// way of getting the data at the moment. -pub async fn blob_get( - ctx: &ServiceContext<'_>, - params: Params<'static>, -) -> Result { - info!("Getting blob for S3 hash"); - let hash: Bytes = params.parse()?; - let data = BlobService::get(ctx, hash.as_ref()).await?; - let metadata = BlobService::get_metadata(ctx, hash.as_ref()).await?; - - let output = GetBlobOutput { - data, - mime: metadata.mime, - size: metadata.size, - }; - Ok(output) -} - pub async fn file_get( ctx: &ServiceContext<'_>, params: Params<'static>, @@ -79,21 +59,18 @@ pub async fn file_get( } } -pub async fn file_upload( +pub async fn file_create( ctx: &ServiceContext<'_>, params: Params<'static>, -) -> Result { - let input: UploadFile = params.parse()?; +) -> Result { + let input: CreateFile = params.parse()?; info!( - "Uploading file '{}' ({} bytes) to page ID {} in site ID {}", - input.name, - input.data.len(), - input.page_id, - input.site_id, + "Creating file on page ID {} in site ID {}", + input.page_id, input.site_id, ); - FileService::upload(ctx, input).await + FileService::create(ctx, input).await } pub async fn file_edit( diff --git a/deepwell/src/endpoints/misc.rs b/deepwell/src/endpoints/misc.rs index 6991a62fd1..adf2405e2d 100644 --- a/deepwell/src/endpoints/misc.rs +++ b/deepwell/src/endpoints/misc.rs @@ -21,6 +21,7 @@ use super::prelude::*; use crate::info; use sea_orm::{ConnectionTrait, DatabaseBackend, Statement}; +use serde_json::Value as JsonValue; use std::path::PathBuf; use wikidot_normalize::normalize; @@ -57,6 +58,16 @@ pub async fn ping( Ok("Pong!") } +pub async fn echo( + _ctx: &ServiceContext<'_>, + params: Params<'static>, +) -> Result { + // Just write out whatever JSON value they put in + let data: JsonValue = params.parse()?; + info!("Got echo request, sending back to caller"); + Ok(data) +} + /// Method which always returns an error. /// For testing. pub async fn yield_error( diff --git a/deepwell/src/endpoints/mod.rs b/deepwell/src/endpoints/mod.rs index cdaa7fbf09..aaaae95c65 100644 --- a/deepwell/src/endpoints/mod.rs +++ b/deepwell/src/endpoints/mod.rs @@ -43,6 +43,7 @@ mod prelude { } pub mod auth; +pub mod blob; pub mod category; pub mod domain; pub mod email; diff --git a/deepwell/src/models/blob_pending.rs b/deepwell/src/models/blob_pending.rs new file mode 100644 index 0000000000..8cd96e2abf --- /dev/null +++ b/deepwell/src/models/blob_pending.rs @@ -0,0 +1,41 @@ +//! `SeaORM` Entity, @generated by sea-orm-codegen 1.0.1 + +use sea_orm::entity::prelude::*; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] +#[sea_orm(table_name = "blob_pending")] +pub struct Model { + #[sea_orm(primary_key, auto_increment = false, column_type = "Text")] + pub external_id: String, + pub created_by: i64, + pub created_at: TimeDateTimeWithTimeZone, + pub expires_at: TimeDateTimeWithTimeZone, + pub expected_length: i64, + #[sea_orm(column_type = "Text")] + pub s3_path: String, + #[sea_orm(column_type = "VarBinary(StringLen::None)")] + pub s3_hash: Option>, + #[sea_orm(column_type = "Text")] + pub presign_url: String, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::user::Entity", + from = "Column::CreatedBy", + to = "super::user::Column::UserId", + on_update = "NoAction", + on_delete = "NoAction" + )] + User, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::User.def() + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/deepwell/src/models/mod.rs b/deepwell/src/models/mod.rs index 238e1d33ff..3ba7524a08 100644 --- a/deepwell/src/models/mod.rs +++ b/deepwell/src/models/mod.rs @@ -3,6 +3,7 @@ pub mod prelude; pub mod alias; +pub mod blob_pending; pub mod file; pub mod file_revision; pub mod filter; diff --git a/deepwell/src/models/prelude.rs b/deepwell/src/models/prelude.rs index 073cd95cfe..169b0d0137 100644 --- a/deepwell/src/models/prelude.rs +++ b/deepwell/src/models/prelude.rs @@ -1,6 +1,7 @@ //! `SeaORM` Entity, @generated by sea-orm-codegen 1.0.1 pub use super::alias::Entity as Alias; +pub use super::blob_pending::Entity as BlobPending; pub use super::file::Entity as File; pub use super::file_revision::Entity as FileRevision; pub use super::filter::Entity as Filter; diff --git a/deepwell/src/models/user.rs b/deepwell/src/models/user.rs index 569ac36070..a985950552 100644 --- a/deepwell/src/models/user.rs +++ b/deepwell/src/models/user.rs @@ -50,6 +50,8 @@ pub struct Model { pub enum Relation { #[sea_orm(has_many = "super::alias::Entity")] Alias, + #[sea_orm(has_many = "super::blob_pending::Entity")] + BlobPending, #[sea_orm(has_many = "super::file_revision::Entity")] FileRevision, #[sea_orm(has_many = "super::message::Entity")] @@ -76,6 +78,12 @@ impl Related for Entity { } } +impl Related for Entity { + fn to() -> RelationDef { + Relation::BlobPending.def() + } +} + impl Related for Entity { fn to() -> RelationDef { Relation::FileRevision.def() diff --git a/deepwell/src/services/blob/mod.rs b/deepwell/src/services/blob/mod.rs index 1411669dc5..31986dee0c 100644 --- a/deepwell/src/services/blob/mod.rs +++ b/deepwell/src/services/blob/mod.rs @@ -27,6 +27,9 @@ #[allow(unused_imports)] mod prelude { pub use super::super::prelude::*; + pub use super::service::{ + EMPTY_BLOB_HASH, EMPTY_BLOB_MIME, EMPTY_BLOB_TIMESTAMP, PRESIGN_DIRECTORY, + }; pub use super::structs::*; pub use crate::hash::{blob_hash_to_hex, sha512_hash, BlobHash}; } @@ -36,5 +39,8 @@ mod service; mod structs; pub use self::mime::MimeAnalyzer; -pub use self::service::BlobService; +pub use self::service::{ + BlobService, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME, EMPTY_BLOB_TIMESTAMP, + PRESIGN_DIRECTORY, +}; pub use self::structs::*; diff --git a/deepwell/src/services/blob/service.rs b/deepwell/src/services/blob/service.rs index 2f478589c7..6212061c5b 100644 --- a/deepwell/src/services/blob/service.rs +++ b/deepwell/src/services/blob/service.rs @@ -18,21 +18,27 @@ * along with this program. If not, see . */ -// TEMP, until https://scuttle.atlassian.net/browse/WJ-1032 -#![allow(dead_code)] - use super::prelude::*; +use crate::models::blob_pending::{ + self, Entity as BlobPending, Model as BlobPendingModel, +}; +use crate::utils::assert_is_csprng; +use cuid2::cuid; +use rand::distributions::{Alphanumeric, DistString}; +use rand::thread_rng; use s3::request_trait::ResponseData; use s3::serde_types::HeadObjectResult; +use sea_orm::TransactionTrait; use std::str; +use std::sync::Arc; use time::format_description::well_known::Rfc2822; -use time::OffsetDateTime; +use time::{Duration, OffsetDateTime}; /// Hash for empty blobs. /// /// Even though it is not the SHA-512 hash, for simplicity we treat the hash /// value with all zeroes to be the blob address for the empty blob. -/// This empty file is not actually stored in S3 but instead is a "virtual file", +/// This empty blob is not actually stored in S3 but instead is a "virtual blob", /// considered to have always been present in `BlobService`. pub const EMPTY_BLOB_HASH: BlobHash = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -48,66 +54,337 @@ pub const EMPTY_BLOB_MIME: &str = "inode/x-empty; charset=binary"; /// Timestamp is 2019/01/18 at midnight, the date of the first Wikijump commit. pub const EMPTY_BLOB_TIMESTAMP: i64 = 1547769600; +/// The subdirectory in the S3 bucket where all pending uploads are kept. +pub const PRESIGN_DIRECTORY: &str = "uploads"; + #[derive(Debug)] pub struct BlobService; impl BlobService { - /// Creates a blob with this data, if it does not already exist. - pub async fn create>( + /// Creates an S3 presign URL to allow an end user to upload a blob. + /// This is the start to the upload process for any kind of file. + /// + /// # Returns + /// The generated presign URL, which can be uploaded to. + pub async fn start_upload( + ctx: &ServiceContext<'_>, + StartBlobUpload { user_id, blob_size }: StartBlobUpload, + ) -> Result { + info!("Creating upload by {user_id} with promised length {blob_size}"); + let config = ctx.config(); + let txn = ctx.transaction(); + + // Convert expected length integer type, then check it + let blob_size = i64::try_from(blob_size).map_err(|_| Error::BlobTooBig)?; + if blob_size > config.maximum_blob_size { + error!( + "Blob proposed to upload is too big ({} > {})", + blob_size, config.maximum_blob_size, + ); + + return Err(Error::BlobTooBig); + } + + // Generate primary key and random S3 path + let pending_blob_id = cuid(); + let s3_path = { + let mut path = format!("{PRESIGN_DIRECTORY}/"); + + { + let mut rng = thread_rng(); + assert_is_csprng(&rng); + Alphanumeric.append_string( + &mut rng, + &mut path, + config.presigned_path_length, + ); + } + + path + }; + + info!("Creating presign upload URL for blob at path {s3_path} with primary key {pending_blob_id}"); + + // Create presign URL + let bucket = ctx.s3_bucket(); + let presign_url = + bucket.presign_put(&s3_path, config.presigned_expiry_secs, None)?; + + // Get timestamps + let created_at = now(); + let expires_at = created_at + .checked_add(Duration::seconds(i64::from(config.presigned_expiry_secs))) + .expect("getting expiration timestamp overflowed"); + + // Add pending blob entry + let model = blob_pending::ActiveModel { + external_id: Set(pending_blob_id), + expected_length: Set(blob_size), + s3_path: Set(s3_path), + presign_url: Set(presign_url), + created_by: Set(user_id), + created_at: Set(created_at), + expires_at: Set(expires_at), + ..Default::default() + }; + + let BlobPendingModel { + external_id: pending_blob_id, + presign_url, + .. + } = model.insert(txn).await?; + + debug!("New presign upload URL will last until {expires_at}"); + + Ok(StartBlobUploadOutput { + pending_blob_id, + presign_url, + expires_at, + }) + } + + async fn get_pending_blob_path( ctx: &ServiceContext<'_>, - data: B, - ) -> Result { - let data = data.as_ref(); - info!("Creating blob (length {})", data.len()); + user_id: i64, + pending_blob_id: &str, + ) -> Result { + let txn = ctx.transaction(); + let row = BlobPending::find_by_id(pending_blob_id).one(txn).await?; + let BlobPendingModel { + s3_path, + s3_hash, + created_by, + expected_length, + .. + } = match row { + Some(pending) => pending, + None => return Err(Error::BlobNotFound), + }; + + if user_id != created_by { + error!("User mismatch, user ID {user_id} is attempting to use blob uploaded by {created_by}"); + return Err(Error::BlobWrongUser); + } + + Ok(PendingBlob { + s3_path, + expected_length, + moved_hash: s3_hash, + }) + } + + pub async fn cancel_upload( + ctx: &ServiceContext<'_>, + user_id: i64, + pending_blob_id: &str, + ) -> Result<()> { + info!("Cancelling upload for blob for pending ID {pending_blob_id}"); + let txn = ctx.transaction(); + let PendingBlob { s3_path, .. } = + Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; + + BlobPending::delete_by_id(pending_blob_id).exec(txn).await?; + + if Self::head(ctx, &s3_path).await?.is_some() { + let bucket = ctx.s3_bucket(); + bucket.delete_object(&s3_path).await?; + } + + Ok(()) + } + + /// Helper function to do the actual "move" step of blob finalization. + /// This is where, after uploading to the presign URL, the S3 object is + /// then moved to its permanent location with a hashed name. + /// + /// NOTE: Because S3 changes cannot be rolled back on error, we are + /// creating a separate transaction here so that `blob_pending` + /// changes are persistent even if the outer request fails. + async fn move_uploaded( + ctx: &ServiceContext<'_>, + pending_blob_id: &str, + s3_path: &str, + expected_length: usize, + ) -> Result { + let state = ctx.state(); + let db_state = Arc::clone(&state); + + // Produce temporary context in a new transaction + let txn = db_state.database.begin().await?; + let inner_ctx = ServiceContext::new(&state, &txn); + let result = Self::move_uploaded_inner( + &inner_ctx, + pending_blob_id, + s3_path, + expected_length, + ) + .await; + + // Commit separate transaction, recording a move (if it occurred) + txn.commit().await?; + result + } + + async fn move_uploaded_inner( + ctx: &ServiceContext<'_>, + pending_blob_id: &str, + s3_path: &str, + expected_length: usize, + ) -> Result { + let bucket = ctx.s3_bucket(); + let txn = ctx.transaction(); + + debug!("Download uploaded blob from S3 uploads to get metadata"); + let response = bucket.get_object(s3_path).await?; + let data: Vec = match response.status_code() { + 200 => response.into(), + 404 => { + error!("No blob uploaded at presign path {s3_path}"); + return Err(Error::BlobNotUploaded); + } + _ => { + error!("Unable to retrieve uploaded blob at {s3_path} from S3"); + let error = s3_error(&response, "finalizing uploaded blob")?; + return Err(error); + } + }; + + if expected_length != data.len() { + error!( + "Expected blob length of {} bytes, instead found {} uploaded. Deleting pending.", + expected_length, + data.len(), + ); + bucket.delete_object(&s3_path).await?; + return Err(Error::BlobSizeMismatch); + } // Special handling for empty blobs if data.is_empty() { debug!("File being created is empty, special case"); - return Ok(CreateBlobOutput { + return Ok(FinalizeBlobUploadOutput { hash: EMPTY_BLOB_HASH, mime: str!(EMPTY_BLOB_MIME), size: 0, + created: false, }); } - // Upload blob - let bucket = ctx.s3_bucket(); - let hash = sha512_hash(data); - let hex_hash = blob_hash_to_hex(&hash); + debug!("Updating blob metadata in database and S3"); // Convert size to correct integer type let size: i64 = data.len().try_into().expect("Buffer size exceeds i64"); - match Self::head(ctx, &hex_hash).await? { + let hash = sha512_hash(&data); + let hex_hash = blob_hash_to_hex(&hash); + + // If the blob exists, then just delete the uploaded one. + // + // If it doesn't, then we need to move it. However, within + // S3 we cannot "move" objects, we have to upload and delete the original. + + let result = match Self::head(ctx, &hex_hash).await? { // Blob exists, copy metadata and return that Some(result) => { debug!("Blob with hash {hex_hash} already exists"); - // Content-Type header should be passed in + // TODO: Should we ever update the mime type? + // In case of changing file formats, etc. + + // Content-Type header should be returned let mime = result.content_type.ok_or(Error::S3Response)?; - Ok(CreateBlobOutput { hash, mime, size }) + Ok(FinalizeBlobUploadOutput { + hash, + mime, + size, + created: false, + }) } - // Blob doesn't exist, insert it + // Blob doesn't exist, "move" it None => { debug!("Blob with hash {hex_hash} to be created"); - // Determine MIME type for the new file + // Determine MIME type for the new blob let mime = ctx.mime().get_mime_type(data.to_vec()).await?; - // Put into S3 + // Upload S3 object to final destination let response = bucket - .put_object_with_content_type(&hex_hash, data, &mime) + .put_object_with_content_type(&hex_hash, &data, &mime) .await?; // We assume all unexpected statuses are errors, even if 1XX or 2XX match response.status_code() { - 200 => Ok(CreateBlobOutput { hash, mime, size }), - _ => s3_error(&response, "creating S3 blob"), + 200 => Ok(FinalizeBlobUploadOutput { + hash, + mime, + size, + created: true, + }), + _ => s3_error(&response, "creating final S3 blob")?, } } - } + }; + bucket.delete_object(&s3_path).await?; + + // Update pending blob with hash + let model = blob_pending::ActiveModel { + external_id: Set(str!(pending_blob_id)), + s3_hash: Set(Some(hash.to_vec())), + ..Default::default() + }; + model.update(txn).await?; + + // Return + result + } + + pub async fn finish_upload( + ctx: &ServiceContext<'_>, + user_id: i64, + pending_blob_id: &str, + ) -> Result { + info!("Finishing upload for blob for pending ID {pending_blob_id}"); + + let PendingBlob { + s3_path, + expected_length, + moved_hash, + } = Self::get_pending_blob_path(ctx, user_id, pending_blob_id).await?; + + let output = match moved_hash { + // Need to move from pending to main hash area + None => { + let expected_length = expected_length + .try_into() + .map_err(|_| Error::BlobSizeMismatch)?; + + Self::move_uploaded(ctx, pending_blob_id, &s3_path, expected_length) + .await? + } + + // Already moved + Some(hash_vec) => { + let BlobMetadata { mime, size, .. } = + Self::get_metadata(ctx, &hash_vec).await?; + + debug_assert_eq!(expected_length, size); + + let mut hash = [0; 64]; + hash.copy_from_slice(&hash_vec); + + FinalizeBlobUploadOutput { + hash, + mime, + size, + created: false, + } + } + }; + + // Return result based on blob status + Ok(output) } pub async fn get_optional( @@ -124,7 +401,6 @@ impl BlobService { let bucket = ctx.s3_bucket(); let hex_hash = blob_hash_to_hex(hash); let response = bucket.get_object(&hex_hash).await?; - match response.status_code() { 200 => Ok(Some(response.into())), 404 => Ok(None), @@ -183,6 +459,7 @@ impl BlobService { find_or_error!(Self::get_metadata_optional(ctx, hash), Blob) } + #[allow(dead_code)] // TEMP pub async fn exists(ctx: &ServiceContext<'_>, hash: &[u8]) -> Result { // Special handling for the empty blob if hash == EMPTY_BLOB_HASH { @@ -217,10 +494,10 @@ impl BlobService { async fn head( ctx: &ServiceContext<'_>, - hex_hash: &str, + path: &str, ) -> Result> { let bucket = ctx.s3_bucket(); - let (result, status) = bucket.head_object(hex_hash).await?; + let (result, status) = bucket.head_object(path).await?; match status { 200 | 204 => Ok(Some(result)), @@ -229,6 +506,7 @@ impl BlobService { } } + #[allow(dead_code)] // TEMP pub async fn hard_delete(ctx: &ServiceContext<'_>, hash: &[u8]) -> Result<()> { // Special handling for empty blobs // @@ -269,3 +547,10 @@ fn s3_error(response: &ResponseData, action: &str) -> Result { // TODO replace with S3 backend-specific error Err(Error::S3Response) } + +#[derive(Debug)] +struct PendingBlob { + s3_path: String, + expected_length: i64, + moved_hash: Option>, +} diff --git a/deepwell/src/services/blob/structs.rs b/deepwell/src/services/blob/structs.rs index aa018d9405..1ff7176ee9 100644 --- a/deepwell/src/services/blob/structs.rs +++ b/deepwell/src/services/blob/structs.rs @@ -21,17 +21,44 @@ use super::prelude::*; use time::OffsetDateTime; +#[derive(Deserialize, Debug, Clone)] +pub struct StartBlobUpload { + pub user_id: i64, + pub blob_size: u64, +} + +#[derive(Serialize, Debug, Clone)] +pub struct StartBlobUploadOutput { + pub pending_blob_id: String, + pub presign_url: String, + pub expires_at: OffsetDateTime, +} + +#[derive(Deserialize, Debug, Clone)] +pub struct CancelBlobUpload { + pub user_id: i64, + pub pending_blob_id: String, +} + #[derive(Debug)] -pub struct CreateBlobOutput { +pub struct FinalizeBlobUploadOutput { pub hash: BlobHash, pub mime: String, pub size: i64, + pub created: bool, } #[derive(Debug)] -#[allow(dead_code)] // TEMP pub struct BlobMetadata { pub mime: String, pub size: i64, pub created_at: OffsetDateTime, } + +#[derive(Serialize, Debug, Clone)] +pub struct GetBlobOutput { + pub data: Vec, + pub mime: String, + pub size: i64, + pub created_at: OffsetDateTime, +} diff --git a/deepwell/src/services/context.rs b/deepwell/src/services/context.rs index 05fa2250e0..cdb55d4a48 100644 --- a/deepwell/src/services/context.rs +++ b/deepwell/src/services/context.rs @@ -48,6 +48,11 @@ impl<'txn> ServiceContext<'txn> { } // Getters + #[inline] + pub fn state(&self) -> ServerState { + Arc::clone(&self.state) + } + #[inline] pub fn config(&self) -> &Config { &self.state.config diff --git a/deepwell/src/services/email/structs.rs b/deepwell/src/services/email/structs.rs index f73e275924..37eef2775a 100644 --- a/deepwell/src/services/email/structs.rs +++ b/deepwell/src/services/email/structs.rs @@ -54,7 +54,6 @@ impl Default for EmailValidationOutput { } #[derive(Serialize, Debug, Clone)] -#[serde(rename_all = "kebab-case")] pub enum EmailClassification { Normal, Disposable, diff --git a/deepwell/src/services/error.rs b/deepwell/src/services/error.rs index 01f6188bdc..1277f274bb 100644 --- a/deepwell/src/services/error.rs +++ b/deepwell/src/services/error.rs @@ -236,6 +236,18 @@ pub enum Error { #[error("Blob item does not exist")] BlobNotFound, + #[error("Blob not uploaded")] + BlobNotUploaded, + + #[error("Cannot use blob uploaded by different user")] + BlobWrongUser, + + #[error("Uploaded blob is too big for this operation")] + BlobTooBig, + + #[error("Uploaded blob does not match expected length")] + BlobSizeMismatch, + #[error("Text item does not exist")] TextNotFound, @@ -372,6 +384,10 @@ impl Error { Error::MessageBodyTooLong => 4019, Error::MessageNoRecipients => 4020, Error::MessageTooManyRecipients => 4021, + Error::BlobWrongUser => 4022, + Error::BlobTooBig => 4023, + Error::BlobNotUploaded => 4024, + Error::BlobSizeMismatch => 4025, // 4100 -- Localization Error::LocaleInvalid(_) => 4100, diff --git a/deepwell/src/services/file/service.rs b/deepwell/src/services/file/service.rs index 12eefe1838..efdbf52818 100644 --- a/deepwell/src/services/file/service.rs +++ b/deepwell/src/services/file/service.rs @@ -20,7 +20,11 @@ use super::prelude::*; use crate::models::file::{self, Entity as File, Model as FileModel}; -use crate::services::blob::CreateBlobOutput; +use crate::models::file_revision::{ + self, Entity as FileRevision, Model as FileRevisionModel, +}; +use crate::models::sea_orm_active_enums::FileRevisionType; +use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME}; use crate::services::file_revision::{ CreateFileRevision, CreateFileRevisionBody, CreateFirstFileRevision, CreateResurrectionFileRevision, CreateTombstoneFileRevision, FileBlob, @@ -32,31 +36,28 @@ use crate::services::{BlobService, FileRevisionService, FilterService}; pub struct FileService; impl FileService { - /// Uploads a file and tracks it as a separate file entity. + /// Creates a new file. + /// + /// Starts a file upload and tracks it as a distinct file entity. /// /// In the background, this stores the blob via content addressing, /// meaning that duplicates are not uploaded twice. - pub async fn upload( + pub async fn create( ctx: &ServiceContext<'_>, - UploadFile { + CreateFile { site_id, page_id, name, + uploaded_blob_id, revision_comments, user_id, - data, licensing, bypass_filter, - }: UploadFile, - ) -> Result { + }: CreateFile, + ) -> Result { + info!("Creating file with name '{}'", name); let txn = ctx.transaction(); - info!( - "Creating file with name '{}', content length {}", - name, - data.len(), - ); - // Ensure row consistency Self::check_conflicts(ctx, page_id, &name, "create").await?; @@ -65,9 +66,13 @@ impl FileService { Self::run_filter(ctx, site_id, Some(&name)).await?; } - // Upload to S3, get derived metadata - let CreateBlobOutput { hash, mime, size } = - BlobService::create(ctx, &data).await?; + // Finish blob upload + let FinalizeBlobUploadOutput { + hash: s3_hash, + mime: mime_hint, + size: size_hint, + created: blob_created, + } = BlobService::finish_upload(ctx, user_id, &uploaded_blob_id).await?; // Add new file let model = file::ActiveModel { @@ -78,28 +83,26 @@ impl FileService { }; let file = model.insert(txn).await?; - // Add new file revision - let revision_output = FileRevisionService::create_first( + FileRevisionService::create_first( ctx, CreateFirstFileRevision { - site_id, page_id, + site_id, file_id: file.file_id, user_id, name, - s3_hash: hash, - size_hint: size, - mime_hint: mime, + s3_hash, + size_hint, + mime_hint, + blob_created, licensing, - comments: revision_comments, + revision_comments, }, ) - .await?; - - Ok(revision_output) + .await } - /// Edits a file, including the ability to upload a new version. + /// Edits a file, creating a new revision. pub async fn edit( ctx: &ServiceContext<'_>, EditFile { @@ -120,8 +123,8 @@ impl FileService { let EditFileBody { name, - data, licensing, + uploaded_blob_id, } = body; // Verify name change @@ -136,23 +139,29 @@ impl FileService { } } - // Upload to S3, get derived metadata - let blob = match data { + // If a new file version was uploaded, then finalize. + // + // Get the blob struct for conditionally adding to + // the CreateFileRevisionBody. + let blob = match uploaded_blob_id { ProvidedValue::Unset => ProvidedValue::Unset, - ProvidedValue::Set(bytes) => { - let CreateBlobOutput { hash, mime, size } = - BlobService::create(ctx, &bytes).await?; + ProvidedValue::Set(ref id) => { + let FinalizeBlobUploadOutput { + hash: s3_hash, + mime: mime_hint, + size: size_hint, + created: blob_created, + } = BlobService::finish_upload(ctx, user_id, id).await?; ProvidedValue::Set(FileBlob { - s3_hash: hash, - size_hint: size, - mime_hint: mime, + s3_hash, + mime_hint, + size_hint, + blob_created, }) } }; - // Make database changes - // Update file metadata let model = file::ActiveModel { file_id: Set(file_id), @@ -169,11 +178,11 @@ impl FileService { page_id, file_id, user_id, - comments: revision_comments, + revision_comments, body: CreateFileRevisionBody { name, - blob, licensing, + blob, ..Default::default() }, }, @@ -231,7 +240,7 @@ impl FileService { page_id: current_page_id, file_id, user_id, - comments: revision_comments, + revision_comments, body: CreateFileRevisionBody { page_id: ProvidedValue::Set(destination_page_id), ..Default::default() diff --git a/deepwell/src/services/file/structs.rs b/deepwell/src/services/file/structs.rs index cdb44ea368..8969487426 100644 --- a/deepwell/src/services/file/structs.rs +++ b/deepwell/src/services/file/structs.rs @@ -27,20 +27,20 @@ use serde_json::Value as JsonValue; use time::OffsetDateTime; #[derive(Deserialize, Debug, Clone)] -pub struct UploadFile { +pub struct CreateFile { pub site_id: i64, pub page_id: i64, pub name: String, + pub uploaded_blob_id: String, pub revision_comments: String, pub user_id: i64, - pub data: Bytes<'static>, pub licensing: JsonValue, // TODO #[serde(default)] pub bypass_filter: bool, } -pub type UploadFileOutput = CreateFirstFileRevisionOutput; +pub type CreateFileOutput = CreateFirstFileRevisionOutput; #[derive(Deserialize, Debug, Clone)] pub struct GetFile<'a> { @@ -79,13 +79,6 @@ pub struct GetFileOutput { pub hidden_fields: Vec, } -#[derive(Serialize, Debug, Clone)] -pub struct GetBlobOutput { - pub data: Vec, - pub mime: String, - pub size: i64, -} - #[derive(Deserialize, Debug, Clone)] pub struct EditFile { pub site_id: i64, @@ -105,8 +98,8 @@ pub struct EditFile { #[serde(default)] pub struct EditFileBody { pub name: ProvidedValue, - pub data: ProvidedValue>, pub licensing: ProvidedValue, + pub uploaded_blob_id: ProvidedValue, } pub type EditFileOutput = CreateFileRevisionOutput; diff --git a/deepwell/src/services/file_revision/service.rs b/deepwell/src/services/file_revision/service.rs index 3af73a3d6d..17b1697f6d 100644 --- a/deepwell/src/services/file_revision/service.rs +++ b/deepwell/src/services/file_revision/service.rs @@ -22,11 +22,14 @@ use super::prelude::*; use crate::models::file_revision::{ self, Entity as FileRevision, Model as FileRevisionModel, }; -use crate::services::{OutdateService, PageService}; -use crate::web::FetchDirection; +use crate::services::blob::{FinalizeBlobUploadOutput, EMPTY_BLOB_HASH, EMPTY_BLOB_MIME}; +use crate::services::{BlobService, OutdateService, PageService}; +use crate::web::{Bytes, FetchDirection}; use once_cell::sync::Lazy; use std::num::NonZeroI32; +pub const MAXIMUM_FILE_NAME_LENGTH: usize = 256; + /// The changes for the first revision. /// The first revision is always considered to have changed everything. /// @@ -58,7 +61,7 @@ impl FileRevisionService { mut page_id, file_id, user_id, - comments, + revision_comments, body, }: CreateFileRevision, previous: FileRevisionModel, @@ -68,6 +71,7 @@ impl FileRevisionService { // Fields to create in the revision let mut changes = Vec::new(); + let mut blob_created = ProvidedValue::Unset; let FileRevisionModel { mut name, mut s3_hash, @@ -105,6 +109,7 @@ impl FileRevisionService { s3_hash = new_blob.s3_hash.to_vec(); size_hint = new_blob.size_hint; mime_hint = new_blob.mime_hint; + blob_created = ProvidedValue::Set(new_blob.blob_created); } } @@ -118,6 +123,7 @@ impl FileRevisionService { // If nothing has changed, then don't create a new revision // Also don't rerender the page, this isn't an edit. if changes.is_empty() { + debug!("No changes in file, performing no action"); return Ok(None); } @@ -127,8 +133,12 @@ impl FileRevisionService { return Err(Error::FileNameEmpty); } - if name.len() >= 256 { - error!("File name of invalid length: {}", name.len()); + if name.len() >= MAXIMUM_FILE_NAME_LENGTH { + error!( + "File name of invalid length: {} > {}", + name.len(), + MAXIMUM_FILE_NAME_LENGTH, + ); return Err(Error::FileNameTooLong); } @@ -146,7 +156,7 @@ impl FileRevisionService { // Insert the new revision into the table let model = file_revision::ActiveModel { revision_type: Set(FileRevisionType::Update), - revision_number: Set(0), + revision_number: Set(revision_number), file_id: Set(file_id), page_id: Set(page_id), site_id: Set(site_id), @@ -157,7 +167,7 @@ impl FileRevisionService { mime_hint: Set(mime_hint), licensing: Set(licensing), changes: Set(changes), - comments: Set(comments), + comments: Set(revision_comments), hidden: Set(vec![]), ..Default::default() }; @@ -166,15 +176,13 @@ impl FileRevisionService { Ok(Some(CreateFileRevisionOutput { file_revision_id: revision_id, file_revision_number: revision_number, + blob_created, })) } - /// Creates the first revision for a newly-uploaded file. + /// Creates the first revision for an already-uploaded file. /// /// See `RevisionService::create_first()`. - /// - /// # Panics - /// If the given previous revision is for a different file or page, this method will panic. pub async fn create_first( ctx: &ServiceContext<'_>, CreateFirstFileRevision { @@ -186,8 +194,9 @@ impl FileRevisionService { s3_hash, size_hint, mime_hint, + blob_created, licensing, - comments, + revision_comments, }: CreateFirstFileRevision, ) -> Result { let txn = ctx.transaction(); @@ -211,7 +220,7 @@ impl FileRevisionService { size_hint: Set(size_hint), licensing: Set(licensing), changes: Set(ALL_CHANGES.clone()), - comments: Set(comments), + comments: Set(revision_comments), hidden: Set(vec![]), ..Default::default() }; @@ -220,6 +229,7 @@ impl FileRevisionService { Ok(CreateFirstFileRevisionOutput { file_id, file_revision_id: revision_id, + blob_created, }) } @@ -282,6 +292,7 @@ impl FileRevisionService { Ok(CreateFileRevisionOutput { file_revision_id: revision_id, file_revision_number: revision_number, + blob_created: ProvidedValue::Unset, }) } @@ -369,6 +380,7 @@ impl FileRevisionService { Ok(CreateFileRevisionOutput { file_revision_id: revision_id, file_revision_number: revision_number, + blob_created: ProvidedValue::Unset, }) } diff --git a/deepwell/src/services/file_revision/structs.rs b/deepwell/src/services/file_revision/structs.rs index 3044bb52a8..c56b6f382e 100644 --- a/deepwell/src/services/file_revision/structs.rs +++ b/deepwell/src/services/file_revision/structs.rs @@ -21,7 +21,7 @@ use super::prelude::*; use crate::hash::BlobHash; use crate::services::page_revision::PageRevisionCountOutput; -use crate::web::FetchDirection; +use crate::web::{Bytes, FetchDirection}; #[derive(Debug, Clone)] pub struct CreateFileRevision { @@ -29,7 +29,7 @@ pub struct CreateFileRevision { pub page_id: i64, pub file_id: i64, pub user_id: i64, - pub comments: String, + pub revision_comments: String, pub body: CreateFileRevisionBody, } @@ -46,12 +46,16 @@ pub struct FileBlob { pub s3_hash: BlobHash, pub size_hint: i64, pub mime_hint: String, + pub blob_created: bool, } #[derive(Serialize, Debug, Clone, Default)] pub struct CreateFileRevisionOutput { pub file_revision_id: i64, pub file_revision_number: i32, + + #[serde(default, skip_serializing_if = "ProvidedValue::is_unset")] + pub blob_created: ProvidedValue, } #[derive(Debug, Clone)] @@ -64,14 +68,16 @@ pub struct CreateFirstFileRevision { pub s3_hash: BlobHash, pub size_hint: i64, pub mime_hint: String, + pub blob_created: bool, pub licensing: serde_json::Value, - pub comments: String, + pub revision_comments: String, } #[derive(Serialize, Debug, Clone, Default)] pub struct CreateFirstFileRevisionOutput { pub file_id: i64, pub file_revision_id: i64, + pub blob_created: bool, } #[derive(Deserialize, Debug, Clone)] diff --git a/deepwell/src/services/import/service.rs b/deepwell/src/services/import/service.rs index a0827f7ac4..22835d0c71 100644 --- a/deepwell/src/services/import/service.rs +++ b/deepwell/src/services/import/service.rs @@ -68,8 +68,13 @@ impl ImportService { let avatar_s3_hash = match avatar { None => None, Some(bytes) => { + // FIXME import - uploading avatars + /* let output = BlobService::create(ctx, &bytes).await?; Some(output.hash.to_vec()) + */ + let _ = bytes; + todo!() } }; diff --git a/deepwell/src/services/job/structs.rs b/deepwell/src/services/job/structs.rs index 8f483e98ac..c21917108f 100644 --- a/deepwell/src/services/job/structs.rs +++ b/deepwell/src/services/job/structs.rs @@ -28,6 +28,7 @@ pub enum Job { }, PruneSessions, PruneText, + // TODO add job for pruning incomplete uploads (pending_blob table and corresponding columns for foreign keys) NameChangeRefill, LiftExpiredPunishments, } diff --git a/deepwell/src/services/page_revision/service.rs b/deepwell/src/services/page_revision/service.rs index 5c8b98f3cb..72a9241303 100644 --- a/deepwell/src/services/page_revision/service.rs +++ b/deepwell/src/services/page_revision/service.rs @@ -181,6 +181,7 @@ impl PageRevisionService { // If nothing has changed, then don't create a new revision if changes.is_empty() { + debug!("No changes in edit, only rerendering the page"); Self::rerender(ctx, site_id, page_id, 0).await?; return Ok(None); } diff --git a/deepwell/src/services/user/service.rs b/deepwell/src/services/user/service.rs index fb5bff0b2a..e0c8b54452 100644 --- a/deepwell/src/services/user/service.rs +++ b/deepwell/src/services/user/service.rs @@ -22,7 +22,7 @@ use super::prelude::*; use crate::models::sea_orm_active_enums::{AliasType, UserType}; use crate::models::user::{self, Entity as User, Model as UserModel}; use crate::services::alias::CreateAlias; -use crate::services::blob::{BlobService, CreateBlobOutput}; +use crate::services::blob::{BlobService, FinalizeBlobUploadOutput}; use crate::services::email::{EmailClassification, EmailService}; use crate::services::filter::{FilterClass, FilterType}; use crate::services::{AliasService, FilterService, PasswordService}; @@ -421,12 +421,22 @@ impl UserService { model.user_page = Set(user_page); } - if let ProvidedValue::Set(avatar) = input.avatar { - let s3_hash = match avatar { + if let ProvidedValue::Set(uploaded_blob_id) = input.avatar_uploaded_blob_id { + let s3_hash = match uploaded_blob_id { None => None, - Some(blob) => { - let CreateBlobOutput { hash, .. } = - BlobService::create(ctx, &blob).await?; + Some(uploaded_blob_id) => { + let config = ctx.config(); + let FinalizeBlobUploadOutput { hash, size, .. } = + BlobService::finish_upload(ctx, user.user_id, &uploaded_blob_id) + .await?; + + if size > config.maximum_avatar_size { + error!( + "Uploaded avatar size is too big {} > {}", + size, config.maximum_avatar_size, + ); + return Err(Error::BlobTooBig); + } Some(hash.to_vec()) } diff --git a/deepwell/src/services/user/structs.rs b/deepwell/src/services/user/structs.rs index 2bb1c9e1e2..2971721837 100644 --- a/deepwell/src/services/user/structs.rs +++ b/deepwell/src/services/user/structs.rs @@ -73,7 +73,7 @@ pub struct UpdateUserBody { pub email_verified: ProvidedValue, pub password: ProvidedValue, pub locales: ProvidedValue>, - pub avatar: ProvidedValue>>, + pub avatar_uploaded_blob_id: ProvidedValue>, pub real_name: ProvidedValue>, pub gender: ProvidedValue>, pub birthday: ProvidedValue>, diff --git a/deepwell/src/web/provided_value.rs b/deepwell/src/web/provided_value.rs index 8b51924063..f0ab7350dc 100644 --- a/deepwell/src/web/provided_value.rs +++ b/deepwell/src/web/provided_value.rs @@ -25,6 +25,19 @@ /// it to null (`None`). /// /// The `Unset` variant can only be constructed if the field is absent. +/// +/// ## Notes +/// When serializing or deserializing a field using this enum, you must +/// add the following: +/// ```unchecked +/// #[serde(default, skip_serializing_if = "ProvidedValue::is_unset")] +/// ``` +/// +/// (The `skip_serializing_if` attribute is optional if this is a +/// deserialize-only structure). +/// +/// Otherwise you will get an error mentioning that this enum is impossible +/// to serialize. #[derive(Serialize, Deserialize, Debug, Default, Clone, Hash, PartialEq, Eq)] #[serde(untagged)] pub enum ProvidedValue { @@ -36,20 +49,30 @@ pub enum ProvidedValue { } impl ProvidedValue { - #[inline] pub fn to_option(&self) -> Option<&T> { match self { ProvidedValue::Set(ref value) => Some(value), ProvidedValue::Unset => None, } } + + pub fn is_set(&self) -> bool { + match self { + ProvidedValue::Set(_) => true, + ProvidedValue::Unset => false, + } + } + + #[inline] + pub fn is_unset(&self) -> bool { + !self.is_set() + } } impl ProvidedValue where T: Into, { - #[inline] pub fn into_active_value(self) -> sea_orm::ActiveValue { match self { ProvidedValue::Set(value) => sea_orm::ActiveValue::Set(value), @@ -59,7 +82,6 @@ where } impl From> for Option { - #[inline] fn from(value: ProvidedValue) -> Option { match value { ProvidedValue::Set(value) => Some(value), @@ -69,16 +91,18 @@ impl From> for Option { } #[test] -fn provided_value_deserialize() { +fn serde() { use serde_json::json; - #[derive(Deserialize, Debug)] + #[derive(Serialize, Deserialize, Debug)] struct Object { - #[serde(default)] + #[serde(default, skip_serializing_if = "ProvidedValue::is_unset")] field: ProvidedValue>, } - macro_rules! check { + // Deserialization + + macro_rules! check_deser { ($value:expr, $expected:expr $(,)?) => {{ let object: Object = serde_json::from_value($value).expect("Unable to deserialize JSON"); @@ -90,10 +114,31 @@ fn provided_value_deserialize() { }}; } - check!(json!({}), ProvidedValue::Unset); - check!(json!({ "field": null }), ProvidedValue::Set(None)); - check!( - json!({"field": "value"}), - ProvidedValue::Set(Some(str!("value"))), + check_deser!(json!({}), ProvidedValue::Unset); + check_deser!(json!({ "field": null }), ProvidedValue::Set(None)); + check_deser!( + json!({"field": "apple"}), + ProvidedValue::Set(Some(str!("apple"))), + ); + + // Serialization + + macro_rules! check_ser { + ($field:expr, $expected:expr $(,)?) => {{ + let object = Object { field: $field }; + let json = serde_json::to_string(&object).expect("Unable to serialize JSON"); + + assert_eq!( + json, $expected, + "Actual generated JSON doesn't match expected", + ); + }}; + } + + check_ser!(ProvidedValue::Unset, "{}"); + check_ser!(ProvidedValue::Set(None), r#"{"field":null}"#); + check_ser!( + ProvidedValue::Set(Some(str!("banana"))), + r#"{"field":"banana"}"#, ); } diff --git a/install/files/dev/deepwell.toml b/install/files/dev/deepwell.toml index 2a0ccb8fb8..b842b4c716 100644 --- a/install/files/dev/deepwell.toml +++ b/install/files/dev/deepwell.toml @@ -67,6 +67,12 @@ maximum-name-changes = 3 minimum-name-bytes = 3 refill-name-change-days = 90 +[file] +presigned-path-length = 32 +presigned-expiration-minutes = 10 +maximum-blob-size-kb = 1048576 # 1 GiB +maximum-avatar-size-kb = 100 # 100 KiB + [message] maximum-subject-bytes = 128 maximum-body-bytes = 200000 diff --git a/install/files/local/deepwell.toml b/install/files/local/deepwell.toml index cd49015359..94d45d9dd7 100644 --- a/install/files/local/deepwell.toml +++ b/install/files/local/deepwell.toml @@ -67,6 +67,12 @@ maximum-name-changes = 3 minimum-name-bytes = 3 refill-name-change-days = 90 +[file] +presigned-path-length = 32 +presigned-expiration-minutes = 10 +maximum-blob-size-kb = 1048576 # 1 GiB +maximum-avatar-size-kb = 4096 # 4 MiB + [message] maximum-subject-bytes = 128 maximum-body-bytes = 200000 diff --git a/install/files/prod/deepwell.toml b/install/files/prod/deepwell.toml index 386e863321..8032e9ed06 100644 --- a/install/files/prod/deepwell.toml +++ b/install/files/prod/deepwell.toml @@ -67,6 +67,12 @@ maximum-name-changes = 3 minimum-name-bytes = 3 refill-name-change-days = 90 +[file] +presigned-path-length = 32 +presigned-expiration-minutes = 5 +maximum-blob-size-kb = 1048576 # 1 GiB +maximum-avatar-size-kb = 100 # 100 KiB + [message] maximum-subject-bytes = 128 maximum-body-bytes = 200000