Skip to content

Commit

Permalink
Introduce file upload api (#5977)
Browse files Browse the repository at this point in the history
* Checkout file upload endpoint related code

* Limit file upload to 1.

* Tmp

* Implement FileResource

* ✨

* Fix google-cloud/storage issue

* Add unique sId index

* Fix migration

* 👕

* Make file upload rate limit more aggressive

* Enforce upload window

* Ensure the uploaded file matches the requirements

* ✂️

* Use fromidable to adhere to file requirements

* s/fetchByInternalId/fetchByModelId

* Move try/catch + move markAsFailed for preprocessing

* s/fileRes/file

* Remove types for FileId

* 🙈

* Implement use cases for pre-processing
  • Loading branch information
flvndvd authored and albandum committed Jul 2, 2024
1 parent 44edd38 commit 9e7dad8
Show file tree
Hide file tree
Showing 27 changed files with 1,702 additions and 91 deletions.
2 changes: 2 additions & 0 deletions front/admin/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ import {
WorkspaceHasDomain,
} from "@app/lib/models/workspace";
import { ContentFragmentModel } from "@app/lib/resources/storage/models/content_fragment";
import { FileModel } from "@app/lib/resources/storage/models/files";
import { KeyModel } from "@app/lib/resources/storage/models/keys";
// Labs - Can be removed at all times if a solution is dropped
import {
Expand Down Expand Up @@ -93,6 +94,7 @@ async function main() {
await Provider.sync({ alter: true });
await Clone.sync({ alter: true });
await KeyModel.sync({ alter: true });
await FileModel.sync({ alter: true });
await DustAppSecret.sync({ alter: true });
await DataSource.sync({ alter: true });
await RunModel.sync({ alter: true });
Expand Down
2 changes: 1 addition & 1 deletion front/lib/api/assistant/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,7 @@ async function fetchWorkspaceAgentConfigurationsForView(

let template: TemplateResource | null = null;
if (agent.templateId) {
template = await TemplateResource.fetchById(agent.templateId);
template = await TemplateResource.fetchByModelId(agent.templateId);
}

const agentConfigurationType: AgentConfigurationType = {
Expand Down
94 changes: 94 additions & 0 deletions front/lib/api/files/preprocessing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import type {
FileUseCase,
Result,
SupportedFileContentType,
} from "@dust-tt/types";
import { Err, Ok } from "@dust-tt/types";
import sharp from "sharp";
import { pipeline } from "stream/promises";

import type { Authenticator } from "@app/lib/auth";
import type { FileResource } from "@app/lib/resources/file_resource";
import logger from "@app/logger/logger";

const resizeAndUploadToFileStorage: PreprocessingFunction = async (
auth: Authenticator,
file: FileResource
) => {
const readStream = file.getReadStream(auth, "original");

// Resize the image, preserving the aspect ratio. Longest side is max 768px.
const resizedImageStream = sharp().resize(768, 768, {
fit: sharp.fit.inside, // Ensure longest side is 768px.
withoutEnlargement: true, // Avoid upscaling if image is smaller than 768px.
});

const writeStream = file.getWriteStream(auth, "processed");

try {
await pipeline(readStream, resizedImageStream, writeStream);

return new Ok(undefined);
} catch (err) {
logger.error(
{
fileId: file.sId,
workspaceId: auth.workspace()?.sId,
error: err,
},
"Failed to resize image."
);

return new Err(err as Error);
}
};

// Preprocessing for file upload.

type PreprocessingFunction = (
auth: Authenticator,
file: FileResource
) => Promise<Result<undefined, Error>>;

type PreprocessingPerUseCase = {
[k in FileUseCase]: PreprocessingFunction | undefined;
};

type PreprocessingPerContentType = {
[k in SupportedFileContentType]: PreprocessingPerUseCase | undefined;
};

const processingPerContentType: Partial<PreprocessingPerContentType> = {
"image/jpeg": {
conversation: resizeAndUploadToFileStorage,
},
"image/png": {
conversation: resizeAndUploadToFileStorage,
},
};

export async function maybeApplyPreProcessing(
auth: Authenticator,
file: FileResource
): Promise<Result<undefined, Error>> {
const contentTypeProcessing = processingPerContentType[file.contentType];
if (!contentTypeProcessing) {
await file.markAsReady();

return new Ok(undefined);
}

const processing = contentTypeProcessing[file.useCase];
if (processing) {
const res = await processing(auth, file);
if (res.isErr()) {
await file.markAsFailed();

return res;
}
}

await file.markAsReady();

return new Ok(undefined);
}
2 changes: 2 additions & 0 deletions front/lib/file.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// These are the front-end helpers.

function isMarkdownFile(file: File): boolean {
if (file.type === "") {
const fileExtension = file.name.split(".").at(-1)?.toLowerCase();
Expand Down
6 changes: 3 additions & 3 deletions front/lib/file_storage/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class FileStorage {
fileStream,
gcsFile.createWriteStream({
metadata: {
contentType: file.mimetype,
contentType: file.mimetype ?? undefined,
},
})
);
Expand All @@ -44,7 +44,7 @@ class FileStorage {
contentType,
filePath,
}: {
content: string;
content: string | Buffer;
contentType: string;
filePath: string;
}) {
Expand All @@ -67,7 +67,7 @@ class FileStorage {
return content.toString();
}

async getFileContentType(filename: string): Promise<string | null> {
async getFileContentType(filename: string): Promise<string | undefined> {
const gcsFile = this.file(filename);

const [metadata] = await gcsFile.getMetadata();
Expand Down
11 changes: 8 additions & 3 deletions front/lib/resources/base_resource.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import type { ModelId, Result } from "@dust-tt/types";
import type { Attributes, Model, ModelStatic, Transaction } from "sequelize";

import type { Authenticator } from "@app/lib/auth";

interface BaseResourceConstructor<T extends BaseResource<M>, M extends Model> {
new (model: ModelStatic<M>, blob: Attributes<M>): T;
}
Expand All @@ -9,7 +11,7 @@ interface BaseResourceConstructor<T extends BaseResource<M>, M extends Model> {
* BaseResource serves as a foundational class for resource management.
* It encapsulates common CRUD operations for Sequelize models, ensuring a uniform interface
* across different resources. Each instance represents a specific database row, identified by `id`.
* - `fetchById`: Static method to retrieve an instance based on its ID, ensuring type safety and
* - `fetchByModelId`: Static method to retrieve an instance based on its ID, ensuring type safety and
* the correct model instantiation.
* - `delete`: Instance method to delete the current resource from the database.
* - `update`: Instance method to update the current resource with new values.
Expand All @@ -28,7 +30,7 @@ export abstract class BaseResource<M extends Model> {
this.id = blob.id;
}

static async fetchById<T extends BaseResource<M>, M extends Model>(
static async fetchByModelId<T extends BaseResource<M>, M extends Model>(
this: BaseResourceConstructor<T, M> & {
model: ModelStatic<M>;
},
Expand All @@ -45,5 +47,8 @@ export abstract class BaseResource<M extends Model> {
return new this(this.model, blob.get());
}

abstract delete(transaction?: Transaction): Promise<Result<undefined, Error>>;
abstract delete(
auth: Authenticator,
transaction?: Transaction
): Promise<Result<undefined, Error>>;
}
2 changes: 2 additions & 0 deletions front/lib/resources/content_fragment_resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ export class ContentFragmentResource extends BaseResource<ContentFragmentModel>
* @deprecated use the destroy method.
*/
delete(
// eslint-disable-next-line @typescript-eslint/no-unused-vars
auth: Authenticator,
// eslint-disable-next-line @typescript-eslint/no-unused-vars
transaction?: Transaction
): Promise<Result<undefined, Error>> {
Expand Down
Loading

0 comments on commit 9e7dad8

Please sign in to comment.