diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index f6092c86337d8..d681dc66276dd 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -45,6 +45,10 @@ jobs: if: steps.changed-files.outputs.any_changed == 'true' run: poetry run -C api dotenv-linter ./api/.env.example ./web/.env.example + - name: Ruff formatter check + if: steps.changed-files.outputs.any_changed == 'true' + run: poetry run -C api ruff format --check ./api + - name: Lint hints if: failure() run: echo "Please run 'dev/reformat' to fix the fixable linting errors." diff --git a/api/.env.example b/api/.env.example index 775149f8fd388..f81675fd53810 100644 --- a/api/.env.example +++ b/api/.env.example @@ -267,4 +267,13 @@ APP_MAX_ACTIVE_REQUESTS=0 # Celery beat configuration -CELERY_BEAT_SCHEDULER_TIME=1 \ No newline at end of file +CELERY_BEAT_SCHEDULER_TIME=1 + +# Position configuration +POSITION_TOOL_PINS= +POSITION_TOOL_INCLUDES= +POSITION_TOOL_EXCLUDES= + +POSITION_PROVIDER_PINS= +POSITION_PROVIDER_INCLUDES= +POSITION_PROVIDER_EXCLUDES= diff --git a/api/app.py b/api/app.py index 50441cb81da1f..ad219ca0d6745 100644 --- a/api/app.py +++ b/api/app.py @@ -1,6 +1,6 @@ import os -if os.environ.get("DEBUG", "false").lower() != 'true': +if os.environ.get("DEBUG", "false").lower() != "true": from gevent import monkey monkey.patch_all() @@ -57,7 +57,7 @@ if os.name == "nt": os.system('tzutil /s "UTC"') else: - os.environ['TZ'] = 'UTC' + os.environ["TZ"] = "UTC" time.tzset() @@ -70,13 +70,14 @@ class DifyApp(Flask): # ------------- -config_type = os.getenv('EDITION', default='SELF_HOSTED') # ce edition first +config_type = os.getenv("EDITION", default="SELF_HOSTED") # ce edition first # ---------------------------- # Application Factory Function # ---------------------------- + def create_flask_app_with_configs() -> Flask: """ create a raw flask app @@ -92,7 +93,7 @@ def create_flask_app_with_configs() -> Flask: elif isinstance(value, int | float | bool): os.environ[key] = str(value) elif value is None: - os.environ[key] = '' + os.environ[key] = "" return dify_app @@ -100,10 +101,10 @@ def create_flask_app_with_configs() -> Flask: def create_app() -> Flask: app = create_flask_app_with_configs() - app.secret_key = app.config['SECRET_KEY'] + app.secret_key = app.config["SECRET_KEY"] log_handlers = None - log_file = app.config.get('LOG_FILE') + log_file = app.config.get("LOG_FILE") if log_file: log_dir = os.path.dirname(log_file) os.makedirs(log_dir, exist_ok=True) @@ -111,23 +112,24 @@ def create_app() -> Flask: RotatingFileHandler( filename=log_file, maxBytes=1024 * 1024 * 1024, - backupCount=5 + backupCount=5, ), - logging.StreamHandler(sys.stdout) + logging.StreamHandler(sys.stdout), ] logging.basicConfig( - level=app.config.get('LOG_LEVEL'), - format=app.config.get('LOG_FORMAT'), - datefmt=app.config.get('LOG_DATEFORMAT'), + level=app.config.get("LOG_LEVEL"), + format=app.config.get("LOG_FORMAT"), + datefmt=app.config.get("LOG_DATEFORMAT"), handlers=log_handlers, - force=True + force=True, ) - log_tz = app.config.get('LOG_TZ') + log_tz = app.config.get("LOG_TZ") if log_tz: from datetime import datetime import pytz + timezone = pytz.timezone(log_tz) def time_converter(seconds): @@ -162,24 +164,24 @@ def initialize_extensions(app): @login_manager.request_loader def load_user_from_request(request_from_flask_login): """Load user based on the request.""" - if request.blueprint not in ['console', 'inner_api']: + if request.blueprint not in ["console", "inner_api"]: return None # Check if the user_id contains a dot, indicating the old format - auth_header = request.headers.get('Authorization', '') + auth_header = request.headers.get("Authorization", "") if not auth_header: - auth_token = request.args.get('_token') + auth_token = request.args.get("_token") if not auth_token: - raise Unauthorized('Invalid Authorization token.') + raise Unauthorized("Invalid Authorization token.") else: - if ' ' not in auth_header: - raise Unauthorized('Invalid Authorization header format. Expected \'Bearer \' format.') + if " " not in auth_header: + raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.") auth_scheme, auth_token = auth_header.split(None, 1) auth_scheme = auth_scheme.lower() - if auth_scheme != 'bearer': - raise Unauthorized('Invalid Authorization header format. Expected \'Bearer \' format.') + if auth_scheme != "bearer": + raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.") decoded = PassportService().verify(auth_token) - user_id = decoded.get('user_id') + user_id = decoded.get("user_id") account = AccountService.load_logged_in_account(account_id=user_id, token=auth_token) if account: @@ -190,10 +192,11 @@ def load_user_from_request(request_from_flask_login): @login_manager.unauthorized_handler def unauthorized_handler(): """Handle unauthorized requests.""" - return Response(json.dumps({ - 'code': 'unauthorized', - 'message': "Unauthorized." - }), status=401, content_type="application/json") + return Response( + json.dumps({"code": "unauthorized", "message": "Unauthorized."}), + status=401, + content_type="application/json", + ) # register blueprint routers @@ -204,38 +207,36 @@ def register_blueprints(app): from controllers.service_api import bp as service_api_bp from controllers.web import bp as web_bp - CORS(service_api_bp, - allow_headers=['Content-Type', 'Authorization', 'X-App-Code'], - methods=['GET', 'PUT', 'POST', 'DELETE', 'OPTIONS', 'PATCH'] - ) + CORS( + service_api_bp, + allow_headers=["Content-Type", "Authorization", "X-App-Code"], + methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"], + ) app.register_blueprint(service_api_bp) - CORS(web_bp, - resources={ - r"/*": {"origins": app.config['WEB_API_CORS_ALLOW_ORIGINS']}}, - supports_credentials=True, - allow_headers=['Content-Type', 'Authorization', 'X-App-Code'], - methods=['GET', 'PUT', 'POST', 'DELETE', 'OPTIONS', 'PATCH'], - expose_headers=['X-Version', 'X-Env'] - ) + CORS( + web_bp, + resources={r"/*": {"origins": app.config["WEB_API_CORS_ALLOW_ORIGINS"]}}, + supports_credentials=True, + allow_headers=["Content-Type", "Authorization", "X-App-Code"], + methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"], + expose_headers=["X-Version", "X-Env"], + ) app.register_blueprint(web_bp) - CORS(console_app_bp, - resources={ - r"/*": {"origins": app.config['CONSOLE_CORS_ALLOW_ORIGINS']}}, - supports_credentials=True, - allow_headers=['Content-Type', 'Authorization'], - methods=['GET', 'PUT', 'POST', 'DELETE', 'OPTIONS', 'PATCH'], - expose_headers=['X-Version', 'X-Env'] - ) + CORS( + console_app_bp, + resources={r"/*": {"origins": app.config["CONSOLE_CORS_ALLOW_ORIGINS"]}}, + supports_credentials=True, + allow_headers=["Content-Type", "Authorization"], + methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"], + expose_headers=["X-Version", "X-Env"], + ) app.register_blueprint(console_app_bp) - CORS(files_bp, - allow_headers=['Content-Type'], - methods=['GET', 'PUT', 'POST', 'DELETE', 'OPTIONS', 'PATCH'] - ) + CORS(files_bp, allow_headers=["Content-Type"], methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"]) app.register_blueprint(files_bp) app.register_blueprint(inner_api_bp) @@ -245,29 +246,29 @@ def register_blueprints(app): app = create_app() celery = app.extensions["celery"] -if app.config.get('TESTING'): +if app.config.get("TESTING"): print("App is running in TESTING mode") @app.after_request def after_request(response): """Add Version headers to the response.""" - response.set_cookie('remember_token', '', expires=0) - response.headers.add('X-Version', app.config['CURRENT_VERSION']) - response.headers.add('X-Env', app.config['DEPLOY_ENV']) + response.set_cookie("remember_token", "", expires=0) + response.headers.add("X-Version", app.config["CURRENT_VERSION"]) + response.headers.add("X-Env", app.config["DEPLOY_ENV"]) return response -@app.route('/health') +@app.route("/health") def health(): - return Response(json.dumps({ - 'pid': os.getpid(), - 'status': 'ok', - 'version': app.config['CURRENT_VERSION'] - }), status=200, content_type="application/json") + return Response( + json.dumps({"pid": os.getpid(), "status": "ok", "version": app.config["CURRENT_VERSION"]}), + status=200, + content_type="application/json", + ) -@app.route('/threads') +@app.route("/threads") def threads(): num_threads = threading.active_count() threads = threading.enumerate() @@ -278,32 +279,34 @@ def threads(): thread_id = thread.ident is_alive = thread.is_alive() - thread_list.append({ - 'name': thread_name, - 'id': thread_id, - 'is_alive': is_alive - }) + thread_list.append( + { + "name": thread_name, + "id": thread_id, + "is_alive": is_alive, + } + ) return { - 'pid': os.getpid(), - 'thread_num': num_threads, - 'threads': thread_list + "pid": os.getpid(), + "thread_num": num_threads, + "threads": thread_list, } -@app.route('/db-pool-stat') +@app.route("/db-pool-stat") def pool_stat(): engine = db.engine return { - 'pid': os.getpid(), - 'pool_size': engine.pool.size(), - 'checked_in_connections': engine.pool.checkedin(), - 'checked_out_connections': engine.pool.checkedout(), - 'overflow_connections': engine.pool.overflow(), - 'connection_timeout': engine.pool.timeout(), - 'recycle_time': db.engine.pool._recycle + "pid": os.getpid(), + "pool_size": engine.pool.size(), + "checked_in_connections": engine.pool.checkedin(), + "checked_out_connections": engine.pool.checkedout(), + "overflow_connections": engine.pool.overflow(), + "connection_timeout": engine.pool.timeout(), + "recycle_time": db.engine.pool._recycle, } -if __name__ == '__main__': - app.run(host='0.0.0.0', port=5001) +if __name__ == "__main__": + app.run(host="0.0.0.0", port=5001) diff --git a/api/commands.py b/api/commands.py index 82a32f0f5b8a5..41f1a6444c458 100644 --- a/api/commands.py +++ b/api/commands.py @@ -27,32 +27,29 @@ from services.account_service import RegisterService, TenantService -@click.command('reset-password', help='Reset the account password.') -@click.option('--email', prompt=True, help='The email address of the account whose password you need to reset') -@click.option('--new-password', prompt=True, help='the new password.') -@click.option('--password-confirm', prompt=True, help='the new password confirm.') +@click.command("reset-password", help="Reset the account password.") +@click.option("--email", prompt=True, help="The email address of the account whose password you need to reset") +@click.option("--new-password", prompt=True, help="the new password.") +@click.option("--password-confirm", prompt=True, help="the new password confirm.") def reset_password(email, new_password, password_confirm): """ Reset password of owner account Only available in SELF_HOSTED mode """ if str(new_password).strip() != str(password_confirm).strip(): - click.echo(click.style('sorry. The two passwords do not match.', fg='red')) + click.echo(click.style("sorry. The two passwords do not match.", fg="red")) return - account = db.session.query(Account). \ - filter(Account.email == email). \ - one_or_none() + account = db.session.query(Account).filter(Account.email == email).one_or_none() if not account: - click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red')) + click.echo(click.style("sorry. the account: [{}] not exist .".format(email), fg="red")) return try: valid_password(new_password) except: - click.echo( - click.style('sorry. The passwords must match {} '.format(password_pattern), fg='red')) + click.echo(click.style("sorry. The passwords must match {} ".format(password_pattern), fg="red")) return # generate password salt @@ -65,80 +62,87 @@ def reset_password(email, new_password, password_confirm): account.password = base64_password_hashed account.password_salt = base64_salt db.session.commit() - click.echo(click.style('Congratulations! Password has been reset.', fg='green')) + click.echo(click.style("Congratulations! Password has been reset.", fg="green")) -@click.command('reset-email', help='Reset the account email.') -@click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset') -@click.option('--new-email', prompt=True, help='the new email.') -@click.option('--email-confirm', prompt=True, help='the new email confirm.') +@click.command("reset-email", help="Reset the account email.") +@click.option("--email", prompt=True, help="The old email address of the account whose email you need to reset") +@click.option("--new-email", prompt=True, help="the new email.") +@click.option("--email-confirm", prompt=True, help="the new email confirm.") def reset_email(email, new_email, email_confirm): """ Replace account email :return: """ if str(new_email).strip() != str(email_confirm).strip(): - click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red')) + click.echo(click.style("Sorry, new email and confirm email do not match.", fg="red")) return - account = db.session.query(Account). \ - filter(Account.email == email). \ - one_or_none() + account = db.session.query(Account).filter(Account.email == email).one_or_none() if not account: - click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red')) + click.echo(click.style("sorry. the account: [{}] not exist .".format(email), fg="red")) return try: email_validate(new_email) except: - click.echo( - click.style('sorry. {} is not a valid email. '.format(email), fg='red')) + click.echo(click.style("sorry. {} is not a valid email. ".format(email), fg="red")) return account.email = new_email db.session.commit() - click.echo(click.style('Congratulations!, email has been reset.', fg='green')) - - -@click.command('reset-encrypt-key-pair', help='Reset the asymmetric key pair of workspace for encrypt LLM credentials. ' - 'After the reset, all LLM credentials will become invalid, ' - 'requiring re-entry.' - 'Only support SELF_HOSTED mode.') -@click.confirmation_option(prompt=click.style('Are you sure you want to reset encrypt key pair?' - ' this operation cannot be rolled back!', fg='red')) + click.echo(click.style("Congratulations!, email has been reset.", fg="green")) + + +@click.command( + "reset-encrypt-key-pair", + help="Reset the asymmetric key pair of workspace for encrypt LLM credentials. " + "After the reset, all LLM credentials will become invalid, " + "requiring re-entry." + "Only support SELF_HOSTED mode.", +) +@click.confirmation_option( + prompt=click.style( + "Are you sure you want to reset encrypt key pair?" " this operation cannot be rolled back!", fg="red" + ) +) def reset_encrypt_key_pair(): """ Reset the encrypted key pair of workspace for encrypt LLM credentials. After the reset, all LLM credentials will become invalid, requiring re-entry. Only support SELF_HOSTED mode. """ - if dify_config.EDITION != 'SELF_HOSTED': - click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red')) + if dify_config.EDITION != "SELF_HOSTED": + click.echo(click.style("Sorry, only support SELF_HOSTED mode.", fg="red")) return tenants = db.session.query(Tenant).all() for tenant in tenants: if not tenant: - click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red')) + click.echo(click.style("Sorry, no workspace found. Please enter /install to initialize.", fg="red")) return tenant.encrypt_public_key = generate_key_pair(tenant.id) - db.session.query(Provider).filter(Provider.provider_type == 'custom', Provider.tenant_id == tenant.id).delete() + db.session.query(Provider).filter(Provider.provider_type == "custom", Provider.tenant_id == tenant.id).delete() db.session.query(ProviderModel).filter(ProviderModel.tenant_id == tenant.id).delete() db.session.commit() - click.echo(click.style('Congratulations! ' - 'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green')) + click.echo( + click.style( + "Congratulations! " "the asymmetric key pair of workspace {} has been reset.".format(tenant.id), + fg="green", + ) + ) -@click.command('vdb-migrate', help='migrate vector db.') -@click.option('--scope', default='all', prompt=False, help='The scope of vector database to migrate, Default is All.') +@click.command("vdb-migrate", help="migrate vector db.") +@click.option("--scope", default="all", prompt=False, help="The scope of vector database to migrate, Default is All.") def vdb_migrate(scope: str): - if scope in ['knowledge', 'all']: + if scope in ["knowledge", "all"]: migrate_knowledge_vector_database() - if scope in ['annotation', 'all']: + if scope in ["annotation", "all"]: migrate_annotation_vector_database() @@ -146,7 +150,7 @@ def migrate_annotation_vector_database(): """ Migrate annotation datas to target vector database . """ - click.echo(click.style('Start migrate annotation data.', fg='green')) + click.echo(click.style("Start migrate annotation data.", fg="green")) create_count = 0 skipped_count = 0 total_count = 0 @@ -154,98 +158,103 @@ def migrate_annotation_vector_database(): while True: try: # get apps info - apps = db.session.query(App).filter( - App.status == 'normal' - ).order_by(App.created_at.desc()).paginate(page=page, per_page=50) + apps = ( + db.session.query(App) + .filter(App.status == "normal") + .order_by(App.created_at.desc()) + .paginate(page=page, per_page=50) + ) except NotFound: break page += 1 for app in apps: total_count = total_count + 1 - click.echo(f'Processing the {total_count} app {app.id}. ' - + f'{create_count} created, {skipped_count} skipped.') + click.echo( + f"Processing the {total_count} app {app.id}. " + f"{create_count} created, {skipped_count} skipped." + ) try: - click.echo('Create app annotation index: {}'.format(app.id)) - app_annotation_setting = db.session.query(AppAnnotationSetting).filter( - AppAnnotationSetting.app_id == app.id - ).first() + click.echo("Create app annotation index: {}".format(app.id)) + app_annotation_setting = ( + db.session.query(AppAnnotationSetting).filter(AppAnnotationSetting.app_id == app.id).first() + ) if not app_annotation_setting: skipped_count = skipped_count + 1 - click.echo('App annotation setting is disabled: {}'.format(app.id)) + click.echo("App annotation setting is disabled: {}".format(app.id)) continue # get dataset_collection_binding info - dataset_collection_binding = db.session.query(DatasetCollectionBinding).filter( - DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id - ).first() + dataset_collection_binding = ( + db.session.query(DatasetCollectionBinding) + .filter(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id) + .first() + ) if not dataset_collection_binding: - click.echo('App annotation collection binding is not exist: {}'.format(app.id)) + click.echo("App annotation collection binding is not exist: {}".format(app.id)) continue annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app.id).all() dataset = Dataset( id=app.id, tenant_id=app.tenant_id, - indexing_technique='high_quality', + indexing_technique="high_quality", embedding_model_provider=dataset_collection_binding.provider_name, embedding_model=dataset_collection_binding.model_name, - collection_binding_id=dataset_collection_binding.id + collection_binding_id=dataset_collection_binding.id, ) documents = [] if annotations: for annotation in annotations: document = Document( page_content=annotation.question, - metadata={ - "annotation_id": annotation.id, - "app_id": app.id, - "doc_id": annotation.id - } + metadata={"annotation_id": annotation.id, "app_id": app.id, "doc_id": annotation.id}, ) documents.append(document) - vector = Vector(dataset, attributes=['doc_id', 'annotation_id', 'app_id']) + vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"]) click.echo(f"Start to migrate annotation, app_id: {app.id}.") try: vector.delete() - click.echo( - click.style(f'Successfully delete vector index for app: {app.id}.', - fg='green')) + click.echo(click.style(f"Successfully delete vector index for app: {app.id}.", fg="green")) except Exception as e: - click.echo( - click.style(f'Failed to delete vector index for app {app.id}.', - fg='red')) + click.echo(click.style(f"Failed to delete vector index for app {app.id}.", fg="red")) raise e if documents: try: - click.echo(click.style( - f'Start to created vector index with {len(documents)} annotations for app {app.id}.', - fg='green')) - vector.create(documents) click.echo( - click.style(f'Successfully created vector index for app {app.id}.', fg='green')) + click.style( + f"Start to created vector index with {len(documents)} annotations for app {app.id}.", + fg="green", + ) + ) + vector.create(documents) + click.echo(click.style(f"Successfully created vector index for app {app.id}.", fg="green")) except Exception as e: - click.echo(click.style(f'Failed to created vector index for app {app.id}.', fg='red')) + click.echo(click.style(f"Failed to created vector index for app {app.id}.", fg="red")) raise e - click.echo(f'Successfully migrated app annotation {app.id}.') + click.echo(f"Successfully migrated app annotation {app.id}.") create_count += 1 except Exception as e: click.echo( - click.style('Create app annotation index error: {} {}'.format(e.__class__.__name__, str(e)), - fg='red')) + click.style( + "Create app annotation index error: {} {}".format(e.__class__.__name__, str(e)), fg="red" + ) + ) continue click.echo( - click.style(f'Congratulations! Create {create_count} app annotation indexes, and skipped {skipped_count} apps.', - fg='green')) + click.style( + f"Congratulations! Create {create_count} app annotation indexes, and skipped {skipped_count} apps.", + fg="green", + ) + ) def migrate_knowledge_vector_database(): """ Migrate vector database datas to target vector database . """ - click.echo(click.style('Start migrate vector db.', fg='green')) + click.echo(click.style("Start migrate vector db.", fg="green")) create_count = 0 skipped_count = 0 total_count = 0 @@ -253,87 +262,77 @@ def migrate_knowledge_vector_database(): page = 1 while True: try: - datasets = db.session.query(Dataset).filter(Dataset.indexing_technique == 'high_quality') \ - .order_by(Dataset.created_at.desc()).paginate(page=page, per_page=50) + datasets = ( + db.session.query(Dataset) + .filter(Dataset.indexing_technique == "high_quality") + .order_by(Dataset.created_at.desc()) + .paginate(page=page, per_page=50) + ) except NotFound: break page += 1 for dataset in datasets: total_count = total_count + 1 - click.echo(f'Processing the {total_count} dataset {dataset.id}. ' - + f'{create_count} created, {skipped_count} skipped.') + click.echo( + f"Processing the {total_count} dataset {dataset.id}. " + + f"{create_count} created, {skipped_count} skipped." + ) try: - click.echo('Create dataset vdb index: {}'.format(dataset.id)) + click.echo("Create dataset vdb index: {}".format(dataset.id)) if dataset.index_struct_dict: - if dataset.index_struct_dict['type'] == vector_type: + if dataset.index_struct_dict["type"] == vector_type: skipped_count = skipped_count + 1 continue - collection_name = '' + collection_name = "" if vector_type == VectorType.WEAVIATE: dataset_id = dataset.id collection_name = Dataset.gen_collection_name_by_id(dataset_id) - index_struct_dict = { - "type": VectorType.WEAVIATE, - "vector_store": {"class_prefix": collection_name} - } + index_struct_dict = {"type": VectorType.WEAVIATE, "vector_store": {"class_prefix": collection_name}} dataset.index_struct = json.dumps(index_struct_dict) elif vector_type == VectorType.QDRANT: if dataset.collection_binding_id: - dataset_collection_binding = db.session.query(DatasetCollectionBinding). \ - filter(DatasetCollectionBinding.id == dataset.collection_binding_id). \ - one_or_none() + dataset_collection_binding = ( + db.session.query(DatasetCollectionBinding) + .filter(DatasetCollectionBinding.id == dataset.collection_binding_id) + .one_or_none() + ) if dataset_collection_binding: collection_name = dataset_collection_binding.collection_name else: - raise ValueError('Dataset Collection Bindings is not exist!') + raise ValueError("Dataset Collection Bindings is not exist!") else: dataset_id = dataset.id collection_name = Dataset.gen_collection_name_by_id(dataset_id) - index_struct_dict = { - "type": VectorType.QDRANT, - "vector_store": {"class_prefix": collection_name} - } + index_struct_dict = {"type": VectorType.QDRANT, "vector_store": {"class_prefix": collection_name}} dataset.index_struct = json.dumps(index_struct_dict) elif vector_type == VectorType.MILVUS: dataset_id = dataset.id collection_name = Dataset.gen_collection_name_by_id(dataset_id) - index_struct_dict = { - "type": VectorType.MILVUS, - "vector_store": {"class_prefix": collection_name} - } + index_struct_dict = {"type": VectorType.MILVUS, "vector_store": {"class_prefix": collection_name}} dataset.index_struct = json.dumps(index_struct_dict) elif vector_type == VectorType.RELYT: dataset_id = dataset.id collection_name = Dataset.gen_collection_name_by_id(dataset_id) - index_struct_dict = { - "type": 'relyt', - "vector_store": {"class_prefix": collection_name} - } + index_struct_dict = {"type": "relyt", "vector_store": {"class_prefix": collection_name}} dataset.index_struct = json.dumps(index_struct_dict) elif vector_type == VectorType.TENCENT: dataset_id = dataset.id collection_name = Dataset.gen_collection_name_by_id(dataset_id) - index_struct_dict = { - "type": VectorType.TENCENT, - "vector_store": {"class_prefix": collection_name} - } + index_struct_dict = {"type": VectorType.TENCENT, "vector_store": {"class_prefix": collection_name}} dataset.index_struct = json.dumps(index_struct_dict) elif vector_type == VectorType.PGVECTOR: dataset_id = dataset.id collection_name = Dataset.gen_collection_name_by_id(dataset_id) - index_struct_dict = { - "type": VectorType.PGVECTOR, - "vector_store": {"class_prefix": collection_name} - } + index_struct_dict = {"type": VectorType.PGVECTOR, "vector_store": {"class_prefix": collection_name}} dataset.index_struct = json.dumps(index_struct_dict) elif vector_type == VectorType.OPENSEARCH: dataset_id = dataset.id collection_name = Dataset.gen_collection_name_by_id(dataset_id) index_struct_dict = { "type": VectorType.OPENSEARCH, - "vector_store": {"class_prefix": collection_name} + "vector_store": {"class_prefix": collection_name}, } dataset.index_struct = json.dumps(index_struct_dict) elif vector_type == VectorType.ANALYTICDB: @@ -341,16 +340,13 @@ def migrate_knowledge_vector_database(): collection_name = Dataset.gen_collection_name_by_id(dataset_id) index_struct_dict = { "type": VectorType.ANALYTICDB, - "vector_store": {"class_prefix": collection_name} + "vector_store": {"class_prefix": collection_name}, } dataset.index_struct = json.dumps(index_struct_dict) elif vector_type == VectorType.ELASTICSEARCH: dataset_id = dataset.id index_name = Dataset.gen_collection_name_by_id(dataset_id) - index_struct_dict = { - "type": 'elasticsearch', - "vector_store": {"class_prefix": index_name} - } + index_struct_dict = {"type": "elasticsearch", "vector_store": {"class_prefix": index_name}} dataset.index_struct = json.dumps(index_struct_dict) else: raise ValueError(f"Vector store {vector_type} is not supported.") @@ -361,29 +357,41 @@ def migrate_knowledge_vector_database(): try: vector.delete() click.echo( - click.style(f'Successfully delete vector index {collection_name} for dataset {dataset.id}.', - fg='green')) + click.style( + f"Successfully delete vector index {collection_name} for dataset {dataset.id}.", fg="green" + ) + ) except Exception as e: click.echo( - click.style(f'Failed to delete vector index {collection_name} for dataset {dataset.id}.', - fg='red')) + click.style( + f"Failed to delete vector index {collection_name} for dataset {dataset.id}.", fg="red" + ) + ) raise e - dataset_documents = db.session.query(DatasetDocument).filter( - DatasetDocument.dataset_id == dataset.id, - DatasetDocument.indexing_status == 'completed', - DatasetDocument.enabled == True, - DatasetDocument.archived == False, - ).all() + dataset_documents = ( + db.session.query(DatasetDocument) + .filter( + DatasetDocument.dataset_id == dataset.id, + DatasetDocument.indexing_status == "completed", + DatasetDocument.enabled == True, + DatasetDocument.archived == False, + ) + .all() + ) documents = [] segments_count = 0 for dataset_document in dataset_documents: - segments = db.session.query(DocumentSegment).filter( - DocumentSegment.document_id == dataset_document.id, - DocumentSegment.status == 'completed', - DocumentSegment.enabled == True - ).all() + segments = ( + db.session.query(DocumentSegment) + .filter( + DocumentSegment.document_id == dataset_document.id, + DocumentSegment.status == "completed", + DocumentSegment.enabled == True, + ) + .all() + ) for segment in segments: document = Document( @@ -393,7 +401,7 @@ def migrate_knowledge_vector_database(): "doc_hash": segment.index_node_hash, "document_id": segment.document_id, "dataset_id": segment.dataset_id, - } + }, ) documents.append(document) @@ -401,37 +409,43 @@ def migrate_knowledge_vector_database(): if documents: try: - click.echo(click.style( - f'Start to created vector index with {len(documents)} documents of {segments_count} segments for dataset {dataset.id}.', - fg='green')) + click.echo( + click.style( + f"Start to created vector index with {len(documents)} documents of {segments_count} segments for dataset {dataset.id}.", + fg="green", + ) + ) vector.create(documents) click.echo( - click.style(f'Successfully created vector index for dataset {dataset.id}.', fg='green')) + click.style(f"Successfully created vector index for dataset {dataset.id}.", fg="green") + ) except Exception as e: - click.echo(click.style(f'Failed to created vector index for dataset {dataset.id}.', fg='red')) + click.echo(click.style(f"Failed to created vector index for dataset {dataset.id}.", fg="red")) raise e db.session.add(dataset) db.session.commit() - click.echo(f'Successfully migrated dataset {dataset.id}.') + click.echo(f"Successfully migrated dataset {dataset.id}.") create_count += 1 except Exception as e: db.session.rollback() click.echo( - click.style('Create dataset index error: {} {}'.format(e.__class__.__name__, str(e)), - fg='red')) + click.style("Create dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red") + ) continue click.echo( - click.style(f'Congratulations! Create {create_count} dataset indexes, and skipped {skipped_count} datasets.', - fg='green')) + click.style( + f"Congratulations! Create {create_count} dataset indexes, and skipped {skipped_count} datasets.", fg="green" + ) + ) -@click.command('convert-to-agent-apps', help='Convert Agent Assistant to Agent App.') +@click.command("convert-to-agent-apps", help="Convert Agent Assistant to Agent App.") def convert_to_agent_apps(): """ Convert Agent Assistant to Agent App. """ - click.echo(click.style('Start convert to agent apps.', fg='green')) + click.echo(click.style("Start convert to agent apps.", fg="green")) proceeded_app_ids = [] @@ -466,7 +480,7 @@ def convert_to_agent_apps(): break for app in apps: - click.echo('Converting app: {}'.format(app.id)) + click.echo("Converting app: {}".format(app.id)) try: app.mode = AppMode.AGENT_CHAT.value @@ -478,137 +492,139 @@ def convert_to_agent_apps(): ) db.session.commit() - click.echo(click.style('Converted app: {}'.format(app.id), fg='green')) + click.echo(click.style("Converted app: {}".format(app.id), fg="green")) except Exception as e: - click.echo( - click.style('Convert app error: {} {}'.format(e.__class__.__name__, - str(e)), fg='red')) + click.echo(click.style("Convert app error: {} {}".format(e.__class__.__name__, str(e)), fg="red")) - click.echo(click.style('Congratulations! Converted {} agent apps.'.format(len(proceeded_app_ids)), fg='green')) + click.echo(click.style("Congratulations! Converted {} agent apps.".format(len(proceeded_app_ids)), fg="green")) -@click.command('add-qdrant-doc-id-index', help='add qdrant doc_id index.') -@click.option('--field', default='metadata.doc_id', prompt=False, help='index field , default is metadata.doc_id.') +@click.command("add-qdrant-doc-id-index", help="add qdrant doc_id index.") +@click.option("--field", default="metadata.doc_id", prompt=False, help="index field , default is metadata.doc_id.") def add_qdrant_doc_id_index(field: str): - click.echo(click.style('Start add qdrant doc_id index.', fg='green')) + click.echo(click.style("Start add qdrant doc_id index.", fg="green")) vector_type = dify_config.VECTOR_STORE if vector_type != "qdrant": - click.echo(click.style('Sorry, only support qdrant vector store.', fg='red')) + click.echo(click.style("Sorry, only support qdrant vector store.", fg="red")) return create_count = 0 try: bindings = db.session.query(DatasetCollectionBinding).all() if not bindings: - click.echo(click.style('Sorry, no dataset collection bindings found.', fg='red')) + click.echo(click.style("Sorry, no dataset collection bindings found.", fg="red")) return import qdrant_client from qdrant_client.http.exceptions import UnexpectedResponse from qdrant_client.http.models import PayloadSchemaType from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantConfig + for binding in bindings: if dify_config.QDRANT_URL is None: - raise ValueError('Qdrant url is required.') + raise ValueError("Qdrant url is required.") qdrant_config = QdrantConfig( endpoint=dify_config.QDRANT_URL, api_key=dify_config.QDRANT_API_KEY, root_path=current_app.root_path, timeout=dify_config.QDRANT_CLIENT_TIMEOUT, grpc_port=dify_config.QDRANT_GRPC_PORT, - prefer_grpc=dify_config.QDRANT_GRPC_ENABLED + prefer_grpc=dify_config.QDRANT_GRPC_ENABLED, ) try: client = qdrant_client.QdrantClient(**qdrant_config.to_qdrant_params()) # create payload index - client.create_payload_index(binding.collection_name, field, - field_schema=PayloadSchemaType.KEYWORD) + client.create_payload_index(binding.collection_name, field, field_schema=PayloadSchemaType.KEYWORD) create_count += 1 except UnexpectedResponse as e: # Collection does not exist, so return if e.status_code == 404: - click.echo(click.style(f'Collection not found, collection_name:{binding.collection_name}.', fg='red')) + click.echo( + click.style(f"Collection not found, collection_name:{binding.collection_name}.", fg="red") + ) continue # Some other error occurred, so re-raise the exception else: - click.echo(click.style(f'Failed to create qdrant index, collection_name:{binding.collection_name}.', fg='red')) + click.echo( + click.style( + f"Failed to create qdrant index, collection_name:{binding.collection_name}.", fg="red" + ) + ) except Exception as e: - click.echo(click.style('Failed to create qdrant client.', fg='red')) + click.echo(click.style("Failed to create qdrant client.", fg="red")) - click.echo( - click.style(f'Congratulations! Create {create_count} collection indexes.', - fg='green')) + click.echo(click.style(f"Congratulations! Create {create_count} collection indexes.", fg="green")) -@click.command('create-tenant', help='Create account and tenant.') -@click.option('--email', prompt=True, help='The email address of the tenant account.') -@click.option('--language', prompt=True, help='Account language, default: en-US.') +@click.command("create-tenant", help="Create account and tenant.") +@click.option("--email", prompt=True, help="The email address of the tenant account.") +@click.option("--language", prompt=True, help="Account language, default: en-US.") def create_tenant(email: str, language: Optional[str] = None): """ Create tenant account """ if not email: - click.echo(click.style('Sorry, email is required.', fg='red')) + click.echo(click.style("Sorry, email is required.", fg="red")) return # Create account email = email.strip() - if '@' not in email: - click.echo(click.style('Sorry, invalid email address.', fg='red')) + if "@" not in email: + click.echo(click.style("Sorry, invalid email address.", fg="red")) return - account_name = email.split('@')[0] + account_name = email.split("@")[0] if language not in languages: - language = 'en-US' + language = "en-US" # generate random password new_password = secrets.token_urlsafe(16) # register account - account = RegisterService.register( - email=email, - name=account_name, - password=new_password, - language=language - ) + account = RegisterService.register(email=email, name=account_name, password=new_password, language=language) TenantService.create_owner_tenant_if_not_exist(account) - click.echo(click.style('Congratulations! Account and tenant created.\n' - 'Account: {}\nPassword: {}'.format(email, new_password), fg='green')) + click.echo( + click.style( + "Congratulations! Account and tenant created.\n" "Account: {}\nPassword: {}".format(email, new_password), + fg="green", + ) + ) -@click.command('upgrade-db', help='upgrade the database') +@click.command("upgrade-db", help="upgrade the database") def upgrade_db(): - click.echo('Preparing database migration...') - lock = redis_client.lock(name='db_upgrade_lock', timeout=60) + click.echo("Preparing database migration...") + lock = redis_client.lock(name="db_upgrade_lock", timeout=60) if lock.acquire(blocking=False): try: - click.echo(click.style('Start database migration.', fg='green')) + click.echo(click.style("Start database migration.", fg="green")) # run db migration import flask_migrate + flask_migrate.upgrade() - click.echo(click.style('Database migration successful!', fg='green')) + click.echo(click.style("Database migration successful!", fg="green")) except Exception as e: - logging.exception(f'Database migration failed, error: {e}') + logging.exception(f"Database migration failed, error: {e}") finally: lock.release() else: - click.echo('Database migration skipped') + click.echo("Database migration skipped") -@click.command('fix-app-site-missing', help='Fix app related site missing issue.') +@click.command("fix-app-site-missing", help="Fix app related site missing issue.") def fix_app_site_missing(): """ Fix app related site missing issue. """ - click.echo(click.style('Start fix app related site missing issue.', fg='green')) + click.echo(click.style("Start fix app related site missing issue.", fg="green")) failed_app_ids = [] while True: @@ -639,15 +655,14 @@ def fix_app_site_missing(): app_was_created.send(app, account=account) except Exception as e: failed_app_ids.append(app_id) - click.echo(click.style('Fix app {} related site missing issue failed!'.format(app_id), fg='red')) - logging.exception(f'Fix app related site missing issue failed, error: {e}') + click.echo(click.style("Fix app {} related site missing issue failed!".format(app_id), fg="red")) + logging.exception(f"Fix app related site missing issue failed, error: {e}") continue if not processed_count: break - - click.echo(click.style('Congratulations! Fix app related site missing issue successful!', fg='green')) + click.echo(click.style("Congratulations! Fix app related site missing issue successful!", fg="green")) def register_commands(app): diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index 369b25d788a44..ce59a281bcb4c 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -406,6 +406,7 @@ class DataSetConfig(BaseSettings): default=False, ) + class WorkspaceConfig(BaseSettings): """ Workspace configs @@ -442,6 +443,63 @@ class CeleryBeatConfig(BaseSettings): ) +class PositionConfig(BaseSettings): + + POSITION_PROVIDER_PINS: str = Field( + description='The heads of model providers', + default='', + ) + + POSITION_PROVIDER_INCLUDES: str = Field( + description='The included model providers', + default='', + ) + + POSITION_PROVIDER_EXCLUDES: str = Field( + description='The excluded model providers', + default='', + ) + + POSITION_TOOL_PINS: str = Field( + description='The heads of tools', + default='', + ) + + POSITION_TOOL_INCLUDES: str = Field( + description='The included tools', + default='', + ) + + POSITION_TOOL_EXCLUDES: str = Field( + description='The excluded tools', + default='', + ) + + @computed_field + def POSITION_PROVIDER_PINS_LIST(self) -> list[str]: + return [item.strip() for item in self.POSITION_PROVIDER_PINS.split(',') if item.strip() != ''] + + @computed_field + def POSITION_PROVIDER_INCLUDES_LIST(self) -> list[str]: + return [item.strip() for item in self.POSITION_PROVIDER_INCLUDES.split(',') if item.strip() != ''] + + @computed_field + def POSITION_PROVIDER_EXCLUDES_LIST(self) -> list[str]: + return [item.strip() for item in self.POSITION_PROVIDER_EXCLUDES.split(',') if item.strip() != ''] + + @computed_field + def POSITION_TOOL_PINS_LIST(self) -> list[str]: + return [item.strip() for item in self.POSITION_TOOL_PINS.split(',') if item.strip() != ''] + + @computed_field + def POSITION_TOOL_INCLUDES_LIST(self) -> list[str]: + return [item.strip() for item in self.POSITION_TOOL_INCLUDES.split(',') if item.strip() != ''] + + @computed_field + def POSITION_TOOL_EXCLUDES_LIST(self) -> list[str]: + return [item.strip() for item in self.POSITION_TOOL_EXCLUDES.split(',') if item.strip() != ''] + + class FeatureConfig( # place the configs in alphabet order AppExecutionConfig, @@ -466,6 +524,7 @@ class FeatureConfig( UpdateConfig, WorkflowConfig, WorkspaceConfig, + PositionConfig, # hosted services config HostedServiceConfig, diff --git a/api/constants/__init__.py b/api/constants/__init__.py index e374c04316b27..e22c3268ef428 100644 --- a/api/constants/__init__.py +++ b/api/constants/__init__.py @@ -1 +1 @@ -HIDDEN_VALUE = '[__HIDDEN__]' +HIDDEN_VALUE = "[__HIDDEN__]" diff --git a/api/constants/languages.py b/api/constants/languages.py index 38e49e0d1e2ca..524dc61b5790a 100644 --- a/api/constants/languages.py +++ b/api/constants/languages.py @@ -1,22 +1,22 @@ language_timezone_mapping = { - 'en-US': 'America/New_York', - 'zh-Hans': 'Asia/Shanghai', - 'zh-Hant': 'Asia/Taipei', - 'pt-BR': 'America/Sao_Paulo', - 'es-ES': 'Europe/Madrid', - 'fr-FR': 'Europe/Paris', - 'de-DE': 'Europe/Berlin', - 'ja-JP': 'Asia/Tokyo', - 'ko-KR': 'Asia/Seoul', - 'ru-RU': 'Europe/Moscow', - 'it-IT': 'Europe/Rome', - 'uk-UA': 'Europe/Kyiv', - 'vi-VN': 'Asia/Ho_Chi_Minh', - 'ro-RO': 'Europe/Bucharest', - 'pl-PL': 'Europe/Warsaw', - 'hi-IN': 'Asia/Kolkata', - 'tr-TR': 'Europe/Istanbul', - 'fa-IR': 'Asia/Tehran', + "en-US": "America/New_York", + "zh-Hans": "Asia/Shanghai", + "zh-Hant": "Asia/Taipei", + "pt-BR": "America/Sao_Paulo", + "es-ES": "Europe/Madrid", + "fr-FR": "Europe/Paris", + "de-DE": "Europe/Berlin", + "ja-JP": "Asia/Tokyo", + "ko-KR": "Asia/Seoul", + "ru-RU": "Europe/Moscow", + "it-IT": "Europe/Rome", + "uk-UA": "Europe/Kyiv", + "vi-VN": "Asia/Ho_Chi_Minh", + "ro-RO": "Europe/Bucharest", + "pl-PL": "Europe/Warsaw", + "hi-IN": "Asia/Kolkata", + "tr-TR": "Europe/Istanbul", + "fa-IR": "Asia/Tehran", } languages = list(language_timezone_mapping.keys()) @@ -26,6 +26,5 @@ def supported_language(lang): if lang in languages: return lang - error = ('{lang} is not a valid language.' - .format(lang=lang)) + error = "{lang} is not a valid language.".format(lang=lang) raise ValueError(error) diff --git a/api/constants/model_template.py b/api/constants/model_template.py index cc5a37025479f..7e1a196356c4e 100644 --- a/api/constants/model_template.py +++ b/api/constants/model_template.py @@ -5,82 +5,79 @@ default_app_templates = { # workflow default mode AppMode.WORKFLOW: { - 'app': { - 'mode': AppMode.WORKFLOW.value, - 'enable_site': True, - 'enable_api': True + "app": { + "mode": AppMode.WORKFLOW.value, + "enable_site": True, + "enable_api": True, } }, - # completion default mode AppMode.COMPLETION: { - 'app': { - 'mode': AppMode.COMPLETION.value, - 'enable_site': True, - 'enable_api': True + "app": { + "mode": AppMode.COMPLETION.value, + "enable_site": True, + "enable_api": True, }, - 'model_config': { - 'model': { + "model_config": { + "model": { "provider": "openai", "name": "gpt-4o", "mode": "chat", - "completion_params": {} + "completion_params": {}, }, - 'user_input_form': json.dumps([ - { - "paragraph": { - "label": "Query", - "variable": "query", - "required": True, - "default": "" - } - } - ]), - 'pre_prompt': '{{query}}' + "user_input_form": json.dumps( + [ + { + "paragraph": { + "label": "Query", + "variable": "query", + "required": True, + "default": "", + }, + }, + ] + ), + "pre_prompt": "{{query}}", }, - }, - # chat default mode AppMode.CHAT: { - 'app': { - 'mode': AppMode.CHAT.value, - 'enable_site': True, - 'enable_api': True + "app": { + "mode": AppMode.CHAT.value, + "enable_site": True, + "enable_api": True, }, - 'model_config': { - 'model': { + "model_config": { + "model": { "provider": "openai", "name": "gpt-4o", "mode": "chat", - "completion_params": {} - } - } + "completion_params": {}, + }, + }, }, - # advanced-chat default mode AppMode.ADVANCED_CHAT: { - 'app': { - 'mode': AppMode.ADVANCED_CHAT.value, - 'enable_site': True, - 'enable_api': True - } + "app": { + "mode": AppMode.ADVANCED_CHAT.value, + "enable_site": True, + "enable_api": True, + }, }, - # agent-chat default mode AppMode.AGENT_CHAT: { - 'app': { - 'mode': AppMode.AGENT_CHAT.value, - 'enable_site': True, - 'enable_api': True + "app": { + "mode": AppMode.AGENT_CHAT.value, + "enable_site": True, + "enable_api": True, }, - 'model_config': { - 'model': { + "model_config": { + "model": { "provider": "openai", "name": "gpt-4o", "mode": "chat", - "completion_params": {} - } - } - } + "completion_params": {}, + }, + }, + }, } diff --git a/api/contexts/__init__.py b/api/contexts/__init__.py index 306fac3a93129..623a1a28eb731 100644 --- a/api/contexts/__init__.py +++ b/api/contexts/__init__.py @@ -1,3 +1,7 @@ from contextvars import ContextVar -tenant_id: ContextVar[str] = ContextVar('tenant_id') \ No newline at end of file +from core.workflow.entities.variable_pool import VariablePool + +tenant_id: ContextVar[str] = ContextVar("tenant_id") + +workflow_variable_pool: ContextVar[VariablePool] = ContextVar("workflow_variable_pool") diff --git a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py index ec17db5f06a30..f4e6675bd4443 100644 --- a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py +++ b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py @@ -93,6 +93,7 @@ def convert(cls, config: dict) -> Optional[DatasetEntity]: reranking_model=dataset_configs.get('reranking_model'), weights=dataset_configs.get('weights'), reranking_enabled=dataset_configs.get('reranking_enabled', True), + rerank_mode=dataset_configs["reranking_mode"], ) ) diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index 0cde659992636..351eb05d8ad41 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -8,6 +8,8 @@ from flask import Flask, current_app from pydantic import ValidationError +from sqlalchemy import select +from sqlalchemy.orm import Session import contexts from core.app.app_config.features.file_upload.manager import FileUploadConfigManager @@ -18,15 +20,20 @@ from core.app.apps.base_app_queue_manager import AppQueueManager, GenerateTaskStoppedException, PublishFrom from core.app.apps.message_based_app_generator import MessageBasedAppGenerator from core.app.apps.message_based_app_queue_manager import MessageBasedAppQueueManager -from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom +from core.app.entities.app_invoke_entities import ( + AdvancedChatAppGenerateEntity, + InvokeFrom, +) from core.app.entities.task_entities import ChatbotAppBlockingResponse, ChatbotAppStreamResponse from core.file.message_file_parser import MessageFileParser from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError from core.ops.ops_trace_manager import TraceQueueManager +from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable from extensions.ext_database import db from models.account import Account from models.model import App, Conversation, EndUser, Message -from models.workflow import Workflow +from models.workflow import ConversationVariable, Workflow logger = logging.getLogger(__name__) @@ -120,7 +127,7 @@ def generate( conversation=conversation, stream=stream ) - + def single_iteration_generate(self, app_model: App, workflow: Workflow, node_id: str, @@ -140,10 +147,10 @@ def single_iteration_generate(self, app_model: App, """ if not node_id: raise ValueError('node_id is required') - + if args.get('inputs') is None: raise ValueError('inputs is required') - + extras = { "auto_generate_conversation_name": False } @@ -209,7 +216,7 @@ def _generate(self, *, # update conversation features conversation.override_model_configs = workflow.features db.session.commit() - db.session.refresh(conversation) + # db.session.refresh(conversation) # init queue manager queue_manager = MessageBasedAppQueueManager( @@ -221,15 +228,69 @@ def _generate(self, *, message_id=message.id ) + # Init conversation variables + stmt = select(ConversationVariable).where( + ConversationVariable.app_id == conversation.app_id, ConversationVariable.conversation_id == conversation.id + ) + with Session(db.engine) as session: + conversation_variables = session.scalars(stmt).all() + if not conversation_variables: + # Create conversation variables if they don't exist. + conversation_variables = [ + ConversationVariable.from_variable( + app_id=conversation.app_id, conversation_id=conversation.id, variable=variable + ) + for variable in workflow.conversation_variables + ] + session.add_all(conversation_variables) + # Convert database entities to variables. + conversation_variables = [item.to_variable() for item in conversation_variables] + + session.commit() + + # Increment dialogue count. + conversation.dialogue_count += 1 + + conversation_id = conversation.id + conversation_dialogue_count = conversation.dialogue_count + db.session.commit() + db.session.refresh(conversation) + + inputs = application_generate_entity.inputs + query = application_generate_entity.query + files = application_generate_entity.files + + user_id = None + if application_generate_entity.invoke_from in [InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API]: + end_user = db.session.query(EndUser).filter(EndUser.id == application_generate_entity.user_id).first() + if end_user: + user_id = end_user.session_id + else: + user_id = application_generate_entity.user_id + + # Create a variable pool. + system_inputs = { + SystemVariable.QUERY: query, + SystemVariable.FILES: files, + SystemVariable.CONVERSATION_ID: conversation_id, + SystemVariable.USER_ID: user_id, + SystemVariable.DIALOGUE_COUNT: conversation_dialogue_count, + } + variable_pool = VariablePool( + system_variables=system_inputs, + user_inputs=inputs, + environment_variables=workflow.environment_variables, + conversation_variables=conversation_variables, + ) + contexts.workflow_variable_pool.set(variable_pool) + # new thread worker_thread = threading.Thread(target=self._generate_worker, kwargs={ 'flask_app': current_app._get_current_object(), 'application_generate_entity': application_generate_entity, 'queue_manager': queue_manager, - 'conversation_id': conversation.id, 'message_id': message.id, - 'user': user, - 'context': contextvars.copy_context() + 'context': contextvars.copy_context(), }) worker_thread.start() @@ -242,7 +303,7 @@ def _generate(self, *, conversation=conversation, message=message, user=user, - stream=stream + stream=stream, ) return AdvancedChatAppGenerateResponseConverter.convert( @@ -253,9 +314,7 @@ def _generate(self, *, def _generate_worker(self, flask_app: Flask, application_generate_entity: AdvancedChatAppGenerateEntity, queue_manager: AppQueueManager, - conversation_id: str, message_id: str, - user: Account, context: contextvars.Context) -> None: """ Generate worker in a new thread. @@ -282,8 +341,7 @@ def _generate_worker(self, flask_app: Flask, user_id=application_generate_entity.user_id ) else: - # get conversation and message - conversation = self._get_conversation(conversation_id) + # get message message = self._get_message(message_id) # chatbot app @@ -291,7 +349,6 @@ def _generate_worker(self, flask_app: Flask, runner.run( application_generate_entity=application_generate_entity, queue_manager=queue_manager, - conversation=conversation, message=message ) except GenerateTaskStoppedException: @@ -314,14 +371,17 @@ def _generate_worker(self, flask_app: Flask, finally: db.session.close() - def _handle_advanced_chat_response(self, application_generate_entity: AdvancedChatAppGenerateEntity, - workflow: Workflow, - queue_manager: AppQueueManager, - conversation: Conversation, - message: Message, - user: Union[Account, EndUser], - stream: bool = False) \ - -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]: + def _handle_advanced_chat_response( + self, + *, + application_generate_entity: AdvancedChatAppGenerateEntity, + workflow: Workflow, + queue_manager: AppQueueManager, + conversation: Conversation, + message: Message, + user: Union[Account, EndUser], + stream: bool = False, + ) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]: """ Handle response. :param application_generate_entity: application generate entity @@ -341,7 +401,7 @@ def _handle_advanced_chat_response(self, application_generate_entity: AdvancedCh conversation=conversation, message=message, user=user, - stream=stream + stream=stream, ) try: diff --git a/api/core/app/apps/advanced_chat/app_runner.py b/api/core/app/apps/advanced_chat/app_runner.py index 47c53531f6aa1..5dc03979cf3b4 100644 --- a/api/core/app/apps/advanced_chat/app_runner.py +++ b/api/core/app/apps/advanced_chat/app_runner.py @@ -4,9 +4,6 @@ from collections.abc import Mapping from typing import Any, Optional, cast -from sqlalchemy import select -from sqlalchemy.orm import Session - from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfig from core.app.apps.advanced_chat.workflow_event_trigger_callback import WorkflowEventTriggerCallback from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom @@ -19,13 +16,10 @@ from core.app.entities.queue_entities import QueueAnnotationReplyEvent, QueueStopEvent, QueueTextChunkEvent from core.moderation.base import ModerationException from core.workflow.callbacks.base_workflow_callback import WorkflowCallback -from core.workflow.entities.node_entities import SystemVariable -from core.workflow.entities.variable_pool import VariablePool from core.workflow.nodes.base_node import UserFrom from core.workflow.workflow_engine_manager import WorkflowEngineManager from extensions.ext_database import db -from models.model import App, Conversation, EndUser, Message -from models.workflow import ConversationVariable, Workflow +from models import App, Message, Workflow logger = logging.getLogger(__name__) @@ -39,7 +33,6 @@ def run( self, application_generate_entity: AdvancedChatAppGenerateEntity, queue_manager: AppQueueManager, - conversation: Conversation, message: Message, ) -> None: """ @@ -63,15 +56,6 @@ def run( inputs = application_generate_entity.inputs query = application_generate_entity.query - files = application_generate_entity.files - - user_id = None - if application_generate_entity.invoke_from in [InvokeFrom.WEB_APP, InvokeFrom.SERVICE_API]: - end_user = db.session.query(EndUser).filter(EndUser.id == application_generate_entity.user_id).first() - if end_user: - user_id = end_user.session_id - else: - user_id = application_generate_entity.user_id # moderation if self.handle_input_moderation( @@ -103,38 +87,6 @@ def run( if bool(os.environ.get('DEBUG', 'False').lower() == 'true'): workflow_callbacks.append(WorkflowLoggingCallback()) - # Init conversation variables - stmt = select(ConversationVariable).where( - ConversationVariable.app_id == conversation.app_id, ConversationVariable.conversation_id == conversation.id - ) - with Session(db.engine) as session: - conversation_variables = session.scalars(stmt).all() - if not conversation_variables: - conversation_variables = [ - ConversationVariable.from_variable( - app_id=conversation.app_id, conversation_id=conversation.id, variable=variable - ) - for variable in workflow.conversation_variables - ] - session.add_all(conversation_variables) - session.commit() - # Convert database entities to variables - conversation_variables = [item.to_variable() for item in conversation_variables] - - # Create a variable pool. - system_inputs = { - SystemVariable.QUERY: query, - SystemVariable.FILES: files, - SystemVariable.CONVERSATION_ID: conversation.id, - SystemVariable.USER_ID: user_id, - } - variable_pool = VariablePool( - system_variables=system_inputs, - user_inputs=inputs, - environment_variables=workflow.environment_variables, - conversation_variables=conversation_variables, - ) - # RUN WORKFLOW workflow_engine_manager = WorkflowEngineManager() workflow_engine_manager.run_workflow( @@ -146,7 +98,6 @@ def run( invoke_from=application_generate_entity.invoke_from, callbacks=workflow_callbacks, call_depth=application_generate_entity.call_depth, - variable_pool=variable_pool, ) def single_iteration_run( @@ -155,7 +106,7 @@ def single_iteration_run( """ Single iteration run """ - app_record: App = db.session.query(App).filter(App.id == app_id).first() + app_record = db.session.query(App).filter(App.id == app_id).first() if not app_record: raise ValueError('App not found') diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index 91a43ed449302..f8efcb59606d0 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -4,6 +4,7 @@ from collections.abc import Generator from typing import Any, Optional, Union, cast +import contexts from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom @@ -47,7 +48,8 @@ from core.model_runtime.entities.llm_entities import LLMUsage from core.model_runtime.utils.encoders import jsonable_encoder from core.ops.ops_trace_manager import TraceQueueManager -from core.workflow.entities.node_entities import NodeType, SystemVariable +from core.workflow.entities.node_entities import NodeType +from core.workflow.enums import SystemVariable from core.workflow.nodes.answer.answer_node import AnswerNode from core.workflow.nodes.answer.entities import TextGenerateRouteChunk, VarGenerateRouteChunk from events.message_event import message_was_created @@ -71,6 +73,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc _application_generate_entity: AdvancedChatAppGenerateEntity _workflow: Workflow _user: Union[Account, EndUser] + # Deprecated _workflow_system_variables: dict[SystemVariable, Any] _iteration_nested_relations: dict[str, list[str]] @@ -81,7 +84,7 @@ def __init__( conversation: Conversation, message: Message, user: Union[Account, EndUser], - stream: bool + stream: bool, ) -> None: """ Initialize AdvancedChatAppGenerateTaskPipeline. @@ -103,11 +106,12 @@ def __init__( self._workflow = workflow self._conversation = conversation self._message = message + # Deprecated self._workflow_system_variables = { SystemVariable.QUERY: message.query, SystemVariable.FILES: application_generate_entity.files, SystemVariable.CONVERSATION_ID: conversation.id, - SystemVariable.USER_ID: user_id + SystemVariable.USER_ID: user_id, } self._task_state = AdvancedChatTaskState( @@ -613,7 +617,9 @@ def _generate_stream_outputs_when_node_finished(self) -> Optional[Generator]: if route_chunk_node_id == 'sys': # system variable - value = self._workflow_system_variables.get(SystemVariable.value_of(value_selector[1])) + value = contexts.workflow_variable_pool.get().get(value_selector) + if value: + value = value.text elif route_chunk_node_id in self._iteration_nested_relations: # it's a iteration variable if not self._iteration_state or route_chunk_node_id not in self._iteration_state.current_iterations: diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py index 58c7d04b8348f..6fb387c15ac56 100644 --- a/api/core/app/apps/base_app_runner.py +++ b/api/core/app/apps/base_app_runner.py @@ -1,6 +1,6 @@ import time from collections.abc import Generator -from typing import Optional, Union +from typing import TYPE_CHECKING, Optional, Union from core.app.app_config.entities import ExternalDataVariableEntity, PromptTemplateEntity from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom @@ -14,7 +14,6 @@ from core.app.features.annotation_reply.annotation_reply import AnnotationReplyFeature from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature from core.external_data_tool.external_data_fetch import ExternalDataFetch -from core.file.file_obj import FileVar from core.memory.token_buffer_memory import TokenBufferMemory from core.model_manager import ModelInstance from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage @@ -27,13 +26,16 @@ from core.prompt.simple_prompt_transform import ModelMode, SimplePromptTransform from models.model import App, AppMode, Message, MessageAnnotation +if TYPE_CHECKING: + from core.file.file_obj import FileVar + class AppRunner: def get_pre_calculate_rest_tokens(self, app_record: App, model_config: ModelConfigWithCredentialsEntity, prompt_template_entity: PromptTemplateEntity, inputs: dict[str, str], - files: list[FileVar], + files: list["FileVar"], query: Optional[str] = None) -> int: """ Get pre calculate rest tokens @@ -126,7 +128,7 @@ def organize_prompt_messages(self, app_record: App, model_config: ModelConfigWithCredentialsEntity, prompt_template_entity: PromptTemplateEntity, inputs: dict[str, str], - files: list[FileVar], + files: list["FileVar"], query: Optional[str] = None, context: Optional[str] = None, memory: Optional[TokenBufferMemory] = None) \ @@ -366,7 +368,7 @@ def moderation_for_inputs( message_id=message_id, trace_manager=app_generate_entity.trace_manager ) - + def check_hosting_moderation(self, application_generate_entity: EasyUIBasedAppGenerateEntity, queue_manager: AppQueueManager, prompt_messages: list[PromptMessage]) -> bool: @@ -418,7 +420,7 @@ def fill_in_inputs_from_external_data_tools(self, tenant_id: str, inputs=inputs, query=query ) - + def query_app_annotations_to_reply(self, app_record: App, message: Message, query: str, diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py index c5cd6864020b3..12f69f1528e24 100644 --- a/api/core/app/apps/message_based_app_generator.py +++ b/api/core/app/apps/message_based_app_generator.py @@ -258,7 +258,7 @@ def _get_conversation_introduction(self, application_generate_entity: AppGenerat return introduction - def _get_conversation(self, conversation_id: str) -> Conversation: + def _get_conversation(self, conversation_id: str): """ Get conversation by conversation id :param conversation_id: conversation id @@ -270,6 +270,9 @@ def _get_conversation(self, conversation_id: str) -> Conversation: .first() ) + if not conversation: + raise ConversationNotExistsError() + return conversation def _get_message(self, message_id: str) -> Message: diff --git a/api/core/app/apps/workflow/app_runner.py b/api/core/app/apps/workflow/app_runner.py index 17a99cf1c5fd6..994919391e7ed 100644 --- a/api/core/app/apps/workflow/app_runner.py +++ b/api/core/app/apps/workflow/app_runner.py @@ -11,8 +11,8 @@ WorkflowAppGenerateEntity, ) from core.workflow.callbacks.base_workflow_callback import WorkflowCallback -from core.workflow.entities.node_entities import SystemVariable from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable from core.workflow.nodes.base_node import UserFrom from core.workflow.workflow_engine_manager import WorkflowEngineManager from extensions.ext_database import db diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py index 2b4362150fc7e..5022eb0438d13 100644 --- a/api/core/app/apps/workflow/generate_task_pipeline.py +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -42,7 +42,8 @@ from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline from core.app.task_pipeline.workflow_cycle_manage import WorkflowCycleManage from core.ops.ops_trace_manager import TraceQueueManager -from core.workflow.entities.node_entities import NodeType, SystemVariable +from core.workflow.entities.node_entities import NodeType +from core.workflow.enums import SystemVariable from core.workflow.nodes.end.end_node import EndNode from extensions.ext_database import db from models.account import Account @@ -519,7 +520,7 @@ def _get_iteration_nested_relations(self, graph: dict) -> dict[str, list[str]]: """ nodes = graph.get('nodes') - iteration_ids = [node.get('id') for node in nodes + iteration_ids = [node.get('id') for node in nodes if node.get('data', {}).get('type') in [ NodeType.ITERATION.value, NodeType.LOOP.value, @@ -530,4 +531,3 @@ def _get_iteration_nested_relations(self, graph: dict) -> dict[str, list[str]]: node.get('id') for node in nodes if node.get('data', {}).get('iteration_id') == iteration_id ] for iteration_id in iteration_ids } - \ No newline at end of file diff --git a/api/core/app/entities/app_invoke_entities.py b/api/core/app/entities/app_invoke_entities.py index 9a861c29e2634..6a1ab230416d0 100644 --- a/api/core/app/entities/app_invoke_entities.py +++ b/api/core/app/entities/app_invoke_entities.py @@ -166,4 +166,4 @@ class SingleIterationRunEntity(BaseModel): node_id: str inputs: dict - single_iteration_run: Optional[SingleIterationRunEntity] = None \ No newline at end of file + single_iteration_run: Optional[SingleIterationRunEntity] = None diff --git a/api/core/app/segments/__init__.py b/api/core/app/segments/__init__.py index 174e241261fe8..7de06dfb9639f 100644 --- a/api/core/app/segments/__init__.py +++ b/api/core/app/segments/__init__.py @@ -2,7 +2,6 @@ from .segments import ( ArrayAnySegment, ArraySegment, - FileSegment, FloatSegment, IntegerSegment, NoneSegment, @@ -13,11 +12,9 @@ from .types import SegmentType from .variables import ( ArrayAnyVariable, - ArrayFileVariable, ArrayNumberVariable, ArrayObjectVariable, ArrayStringVariable, - FileVariable, FloatVariable, IntegerVariable, NoneVariable, @@ -32,7 +29,6 @@ 'FloatVariable', 'ObjectVariable', 'SecretVariable', - 'FileVariable', 'StringVariable', 'ArrayAnyVariable', 'Variable', @@ -45,11 +41,9 @@ 'FloatSegment', 'ObjectSegment', 'ArrayAnySegment', - 'FileSegment', 'StringSegment', 'ArrayStringVariable', 'ArrayNumberVariable', 'ArrayObjectVariable', - 'ArrayFileVariable', 'ArraySegment', ] diff --git a/api/core/app/segments/factory.py b/api/core/app/segments/factory.py index 91ff1fdb3de6e..e6e9ce97747ce 100644 --- a/api/core/app/segments/factory.py +++ b/api/core/app/segments/factory.py @@ -2,12 +2,10 @@ from typing import Any from configs import dify_config -from core.file.file_obj import FileVar from .exc import VariableError from .segments import ( ArrayAnySegment, - FileSegment, FloatSegment, IntegerSegment, NoneSegment, @@ -17,11 +15,9 @@ ) from .types import SegmentType from .variables import ( - ArrayFileVariable, ArrayNumberVariable, ArrayObjectVariable, ArrayStringVariable, - FileVariable, FloatVariable, IntegerVariable, ObjectVariable, @@ -49,8 +45,6 @@ def build_variable_from_mapping(mapping: Mapping[str, Any], /) -> Variable: result = FloatVariable.model_validate(mapping) case SegmentType.NUMBER if not isinstance(value, float | int): raise VariableError(f'invalid number value {value}') - case SegmentType.FILE: - result = FileVariable.model_validate(mapping) case SegmentType.OBJECT if isinstance(value, dict): result = ObjectVariable.model_validate(mapping) case SegmentType.ARRAY_STRING if isinstance(value, list): @@ -59,10 +53,6 @@ def build_variable_from_mapping(mapping: Mapping[str, Any], /) -> Variable: result = ArrayNumberVariable.model_validate(mapping) case SegmentType.ARRAY_OBJECT if isinstance(value, list): result = ArrayObjectVariable.model_validate(mapping) - case SegmentType.ARRAY_FILE if isinstance(value, list): - mapping = dict(mapping) - mapping['value'] = [{'value': v} for v in value] - result = ArrayFileVariable.model_validate(mapping) case _: raise VariableError(f'not supported value type {value_type}') if result.size > dify_config.MAX_VARIABLE_SIZE: @@ -83,6 +73,4 @@ def build_segment(value: Any, /) -> Segment: return ObjectSegment(value=value) if isinstance(value, list): return ArrayAnySegment(value=value) - if isinstance(value, FileVar): - return FileSegment(value=value) raise ValueError(f'not supported value {value}') diff --git a/api/core/app/segments/segments.py b/api/core/app/segments/segments.py index 7653e1085f881..5c713cac6747f 100644 --- a/api/core/app/segments/segments.py +++ b/api/core/app/segments/segments.py @@ -5,8 +5,6 @@ from pydantic import BaseModel, ConfigDict, field_validator -from core.file.file_obj import FileVar - from .types import SegmentType @@ -78,14 +76,7 @@ class IntegerSegment(Segment): value: int -class FileSegment(Segment): - value_type: SegmentType = SegmentType.FILE - # TODO: embed FileVar in this model. - value: FileVar - @property - def markdown(self) -> str: - return self.value.to_markdown() class ObjectSegment(Segment): @@ -108,7 +99,13 @@ def markdown(self) -> str: class ArraySegment(Segment): @property def markdown(self) -> str: - return '\n'.join(['- ' + item.markdown for item in self.value]) + items = [] + for item in self.value: + if hasattr(item, 'to_markdown'): + items.append(item.to_markdown()) + else: + items.append(str(item)) + return '\n'.join(items) class ArrayAnySegment(ArraySegment): @@ -130,7 +127,3 @@ class ArrayObjectSegment(ArraySegment): value_type: SegmentType = SegmentType.ARRAY_OBJECT value: Sequence[Mapping[str, Any]] - -class ArrayFileSegment(ArraySegment): - value_type: SegmentType = SegmentType.ARRAY_FILE - value: Sequence[FileSegment] diff --git a/api/core/app/segments/types.py b/api/core/app/segments/types.py index a371058ef52ba..cdd2b0b4b0919 100644 --- a/api/core/app/segments/types.py +++ b/api/core/app/segments/types.py @@ -10,8 +10,6 @@ class SegmentType(str, Enum): ARRAY_STRING = 'array[string]' ARRAY_NUMBER = 'array[number]' ARRAY_OBJECT = 'array[object]' - ARRAY_FILE = 'array[file]' OBJECT = 'object' - FILE = 'file' GROUP = 'group' diff --git a/api/core/app/segments/variables.py b/api/core/app/segments/variables.py index ac26e165425c3..8fef707fcf298 100644 --- a/api/core/app/segments/variables.py +++ b/api/core/app/segments/variables.py @@ -4,11 +4,9 @@ from .segments import ( ArrayAnySegment, - ArrayFileSegment, ArrayNumberSegment, ArrayObjectSegment, ArrayStringSegment, - FileSegment, FloatSegment, IntegerSegment, NoneSegment, @@ -44,10 +42,6 @@ class IntegerVariable(IntegerSegment, Variable): pass -class FileVariable(FileSegment, Variable): - pass - - class ObjectVariable(ObjectSegment, Variable): pass @@ -68,9 +62,6 @@ class ArrayObjectVariable(ArrayObjectSegment, Variable): pass -class ArrayFileVariable(ArrayFileSegment, Variable): - pass - class SecretVariable(StringVariable): value_type: SegmentType = SegmentType.SECRET diff --git a/api/core/app/task_pipeline/workflow_cycle_state_manager.py b/api/core/app/task_pipeline/workflow_cycle_state_manager.py index 545f31fddfaed..8baa8ba09e4b0 100644 --- a/api/core/app/task_pipeline/workflow_cycle_state_manager.py +++ b/api/core/app/task_pipeline/workflow_cycle_state_manager.py @@ -2,7 +2,7 @@ from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity from core.app.entities.task_entities import AdvancedChatTaskState, WorkflowTaskState -from core.workflow.entities.node_entities import SystemVariable +from core.workflow.enums import SystemVariable from models.account import Account from models.model import EndUser from models.workflow import Workflow @@ -13,4 +13,4 @@ class WorkflowCycleStateManager: _workflow: Workflow _user: Union[Account, EndUser] _task_state: Union[AdvancedChatTaskState, WorkflowTaskState] - _workflow_system_variables: dict[SystemVariable, Any] \ No newline at end of file + _workflow_system_variables: dict[SystemVariable, Any] diff --git a/api/core/file/message_file_parser.py b/api/core/file/message_file_parser.py index 01b89907db2a6..085ff07cfde92 100644 --- a/api/core/file/message_file_parser.py +++ b/api/core/file/message_file_parser.py @@ -99,7 +99,7 @@ def validate_and_transform_files_arg(self, files: Sequence[Mapping[str, Any]], f # return all file objs return new_files - def transform_message_files(self, files: list[MessageFile], file_extra_config: FileExtraConfig) -> list[FileVar]: + def transform_message_files(self, files: list[MessageFile], file_extra_config: FileExtraConfig): """ transform message files @@ -144,7 +144,7 @@ def _to_file_objs(self, files: list[Union[dict, MessageFile]], return type_file_objs - def _to_file_obj(self, file: Union[dict, MessageFile], file_extra_config: FileExtraConfig) -> FileVar: + def _to_file_obj(self, file: Union[dict, MessageFile], file_extra_config: FileExtraConfig): """ transform file to file obj diff --git a/api/core/helper/position_helper.py b/api/core/helper/position_helper.py index dd1534c791b31..93e3a87124a88 100644 --- a/api/core/helper/position_helper.py +++ b/api/core/helper/position_helper.py @@ -3,12 +3,13 @@ from collections.abc import Callable from typing import Any +from configs import dify_config from core.tools.utils.yaml_utils import load_yaml_file def get_position_map(folder_path: str, *, file_name: str = "_position.yaml") -> dict[str, int]: """ - Get the mapping from name to index from a YAML file + Get the mapping from name to index from a YAML file. :param folder_path: :param file_name: the YAML file name, default to '_position.yaml' :return: a dict with name as key and index as value @@ -19,6 +20,64 @@ def get_position_map(folder_path: str, *, file_name: str = "_position.yaml") -> return {name: index for index, name in enumerate(positions)} +def get_tool_position_map(folder_path: str, file_name: str = "_position.yaml") -> dict[str, int]: + """ + Get the mapping for tools from name to index from a YAML file. + :param folder_path: + :param file_name: the YAML file name, default to '_position.yaml' + :return: a dict with name as key and index as value + """ + position_map = get_position_map(folder_path, file_name=file_name) + + return sort_and_filter_position_map( + position_map, + pin_list=dify_config.POSITION_TOOL_PINS_LIST, + include_list=dify_config.POSITION_TOOL_INCLUDES_LIST, + exclude_list=dify_config.POSITION_TOOL_EXCLUDES_LIST + ) + + +def get_provider_position_map(folder_path: str, file_name: str = "_position.yaml") -> dict[str, int]: + """ + Get the mapping for providers from name to index from a YAML file. + :param folder_path: + :param file_name: the YAML file name, default to '_position.yaml' + :return: a dict with name as key and index as value + """ + position_map = get_position_map(folder_path, file_name=file_name) + return sort_and_filter_position_map( + position_map, + pin_list=dify_config.POSITION_PROVIDER_PINS_LIST, + include_list=dify_config.POSITION_PROVIDER_INCLUDES_LIST, + exclude_list=dify_config.POSITION_PROVIDER_EXCLUDES_LIST + ) + + +def sort_and_filter_position_map(original_position_map: dict[str, int], pin_list: list[str], include_list: list[str], exclude_list: list[str]) -> dict[str, int]: + """ + Sort and filter the positions + :param position_map: the position map to be sorted and filtered + :param pin_list: the list of pins to be put at the beginning + :param include_set: the set of names to be included + :param exclude_set: the set of names to be excluded + :return: the sorted and filtered position map + """ + positions = sorted(original_position_map.keys(), key=lambda x: original_position_map[x]) + include_set = set(include_list) if include_list else set(positions) + exclude_set = set(exclude_list) if exclude_list else set() + + # Add pins to position map + position_map = {name: idx for idx, name in enumerate(pin_list) if name in original_position_map} + + # Add remaining positions to position map, respecting include and exclude lists + start_idx = len(position_map) + for name in positions: + if name in include_set and name not in exclude_set and name not in position_map: + position_map[name] = start_idx + start_idx += 1 + return position_map + + def sort_by_position_map( position_map: dict[str, int], data: list[Any], @@ -35,7 +94,9 @@ def sort_by_position_map( if not position_map or not data: return data - return sorted(data, key=lambda x: position_map.get(name_func(x), float('inf'))) + filtered_data = [item for item in data if name_func(item) in position_map] + + return sorted(filtered_data, key=lambda x: position_map.get(name_func(x), float('inf'))) def sort_to_dict_by_position_map( diff --git a/api/core/model_runtime/entities/defaults.py b/api/core/model_runtime/entities/defaults.py index 87fe4f681ce5c..d2076bf74a3cd 100644 --- a/api/core/model_runtime/entities/defaults.py +++ b/api/core/model_runtime/entities/defaults.py @@ -1,4 +1,3 @@ - from core.model_runtime.entities.model_entities import DefaultParameterName PARAMETER_RULE_TEMPLATE: dict[DefaultParameterName, dict] = { @@ -94,5 +93,16 @@ }, 'required': False, 'options': ['JSON', 'XML'], - } -} \ No newline at end of file + }, + DefaultParameterName.JSON_SCHEMA: { + 'label': { + 'en_US': 'JSON Schema', + }, + 'type': 'text', + 'help': { + 'en_US': 'Set a response json schema will ensure LLM to adhere it.', + 'zh_Hans': '设置返回的json schema,llm将按照它返回', + }, + 'required': False, + }, +} diff --git a/api/core/model_runtime/entities/model_entities.py b/api/core/model_runtime/entities/model_entities.py index 3d471787bbef8..c257ce63d2792 100644 --- a/api/core/model_runtime/entities/model_entities.py +++ b/api/core/model_runtime/entities/model_entities.py @@ -95,6 +95,7 @@ class DefaultParameterName(Enum): FREQUENCY_PENALTY = "frequency_penalty" MAX_TOKENS = "max_tokens" RESPONSE_FORMAT = "response_format" + JSON_SCHEMA = "json_schema" @classmethod def value_of(cls, value: Any) -> 'DefaultParameterName': @@ -118,6 +119,7 @@ class ParameterType(Enum): INT = "int" STRING = "string" BOOLEAN = "boolean" + TEXT = "text" class ModelPropertyKey(Enum): diff --git a/api/core/model_runtime/model_providers/__base/ai_model.py b/api/core/model_runtime/model_providers/__base/ai_model.py index 0de216bf896fc..716bb63566c37 100644 --- a/api/core/model_runtime/model_providers/__base/ai_model.py +++ b/api/core/model_runtime/model_providers/__base/ai_model.py @@ -151,9 +151,9 @@ def predefined_models(self) -> list[AIModelEntity]: os.path.join(provider_model_type_path, model_schema_yaml) for model_schema_yaml in os.listdir(provider_model_type_path) if not model_schema_yaml.startswith('__') - and not model_schema_yaml.startswith('_') - and os.path.isfile(os.path.join(provider_model_type_path, model_schema_yaml)) - and model_schema_yaml.endswith('.yaml') + and not model_schema_yaml.startswith('_') + and os.path.isfile(os.path.join(provider_model_type_path, model_schema_yaml)) + and model_schema_yaml.endswith('.yaml') ] # get _position.yaml file path diff --git a/api/core/model_runtime/model_providers/model_provider_factory.py b/api/core/model_runtime/model_providers/model_provider_factory.py index b1660afafb12e..e2d17e3257592 100644 --- a/api/core/model_runtime/model_providers/model_provider_factory.py +++ b/api/core/model_runtime/model_providers/model_provider_factory.py @@ -6,7 +6,7 @@ from pydantic import BaseModel, ConfigDict from core.helper.module_import_helper import load_single_subclass_from_source -from core.helper.position_helper import get_position_map, sort_to_dict_by_position_map +from core.helper.position_helper import get_provider_position_map, sort_to_dict_by_position_map from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.entities.provider_entities import ProviderConfig, ProviderEntity, SimpleProviderEntity from core.model_runtime.model_providers.__base.model_provider import ModelProvider @@ -234,7 +234,7 @@ def _get_model_provider_map(self) -> dict[str, ModelProviderExtension]: ] # get _position.yaml file path - position_map = get_position_map(model_providers_path) + position_map = get_provider_position_map(model_providers_path) # traverse all model_provider_dir_paths model_providers: list[ModelProviderExtension] = [] diff --git a/api/core/model_runtime/model_providers/openai/llm/_position.yaml b/api/core/model_runtime/model_providers/openai/llm/_position.yaml index 21661b9a2b8ae..ac7313aaa1bf0 100644 --- a/api/core/model_runtime/model_providers/openai/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/openai/llm/_position.yaml @@ -2,6 +2,7 @@ - gpt-4o - gpt-4o-2024-05-13 - gpt-4o-2024-08-06 +- chatgpt-4o-latest - gpt-4o-mini - gpt-4o-mini-2024-07-18 - gpt-4-turbo diff --git a/api/core/model_runtime/model_providers/openai/llm/chatgpt-4o-latest.yaml b/api/core/model_runtime/model_providers/openai/llm/chatgpt-4o-latest.yaml new file mode 100644 index 0000000000000..98e236650c9e7 --- /dev/null +++ b/api/core/model_runtime/model_providers/openai/llm/chatgpt-4o-latest.yaml @@ -0,0 +1,44 @@ +model: chatgpt-4o-latest +label: + zh_Hans: chatgpt-4o-latest + en_US: chatgpt-4o-latest +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call + - vision +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty + - name: max_tokens + use_template: max_tokens + default: 512 + min: 1 + max: 16384 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: response_format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '2.50' + output: '10.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/openai/llm/gpt-4o-2024-08-06.yaml b/api/core/model_runtime/model_providers/openai/llm/gpt-4o-2024-08-06.yaml index cf2de0f73a0b8..7e430c51a710f 100644 --- a/api/core/model_runtime/model_providers/openai/llm/gpt-4o-2024-08-06.yaml +++ b/api/core/model_runtime/model_providers/openai/llm/gpt-4o-2024-08-06.yaml @@ -37,6 +37,9 @@ parameter_rules: options: - text - json_object + - json_schema + - name: json_schema + use_template: json_schema pricing: input: '2.50' output: '10.00' diff --git a/api/core/model_runtime/model_providers/openai/llm/gpt-4o-mini.yaml b/api/core/model_runtime/model_providers/openai/llm/gpt-4o-mini.yaml index b97fbf8aabcae..23dcf85085e12 100644 --- a/api/core/model_runtime/model_providers/openai/llm/gpt-4o-mini.yaml +++ b/api/core/model_runtime/model_providers/openai/llm/gpt-4o-mini.yaml @@ -37,6 +37,9 @@ parameter_rules: options: - text - json_object + - json_schema + - name: json_schema + use_template: json_schema pricing: input: '0.15' output: '0.60' diff --git a/api/core/model_runtime/model_providers/openai/llm/llm.py b/api/core/model_runtime/model_providers/openai/llm/llm.py index aae2729bdfb04..06135c958463e 100644 --- a/api/core/model_runtime/model_providers/openai/llm/llm.py +++ b/api/core/model_runtime/model_providers/openai/llm/llm.py @@ -1,3 +1,4 @@ +import json import logging from collections.abc import Generator from typing import Optional, Union, cast @@ -544,13 +545,18 @@ def _chat_generate(self, model: str, credentials: dict, response_format = model_parameters.get("response_format") if response_format: - if response_format == "json_object": - response_format = {"type": "json_object"} + if response_format == "json_schema": + json_schema = model_parameters.get("json_schema") + if not json_schema: + raise ValueError("Must define JSON Schema when the response format is json_schema") + try: + schema = json.loads(json_schema) + except: + raise ValueError(f"not currect json_schema format: {json_schema}") + model_parameters.pop("json_schema") + model_parameters["response_format"] = {"type": "json_schema", "json_schema": schema} else: - response_format = {"type": "text"} - - model_parameters["response_format"] = response_format - + model_parameters["response_format"] = {"type": response_format} extra_model_kwargs = {} @@ -922,11 +928,14 @@ def _num_tokens_from_messages(self, model: str, messages: list[PromptMessage], tools: Optional[list[PromptMessageTool]] = None) -> int: """Calculate num tokens for gpt-3.5-turbo and gpt-4 with tiktoken package. - Official documentation: https://github.com/openai/openai-cookbook/blob/ - main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb""" + Official documentation: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb""" if model.startswith('ft:'): model = model.split(':')[1] + # Currently, we can use gpt4o to calculate chatgpt-4o-latest's token. + if model == "chatgpt-4o-latest": + model = "gpt-4o" + try: encoding = tiktoken.encoding_for_model(model) except KeyError: @@ -946,7 +955,7 @@ def _num_tokens_from_messages(self, model: str, messages: list[PromptMessage], raise NotImplementedError( f"get_num_tokens_from_messages() is not presently implemented " f"for model {model}." - "See https://github.com/openai/openai-python/blob/main/chatml.md for " + "See https://platform.openai.com/docs/advanced-usage/managing-tokens for " "information on how messages are converted to tokens." ) num_tokens = 0 diff --git a/api/core/model_runtime/model_providers/upstage/llm/_position.yaml b/api/core/model_runtime/model_providers/upstage/llm/_position.yaml index d4f03e1988f8b..7992843dcb1d1 100644 --- a/api/core/model_runtime/model_providers/upstage/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/upstage/llm/_position.yaml @@ -1 +1 @@ -- soloar-1-mini-chat +- solar-1-mini-chat diff --git a/api/core/prompt/simple_prompt_transform.py b/api/core/prompt/simple_prompt_transform.py index 452b270348b2f..fd7ed0181be2f 100644 --- a/api/core/prompt/simple_prompt_transform.py +++ b/api/core/prompt/simple_prompt_transform.py @@ -1,11 +1,10 @@ import enum import json import os -from typing import Optional +from typing import TYPE_CHECKING, Optional from core.app.app_config.entities import PromptTemplateEntity from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity -from core.file.file_obj import FileVar from core.memory.token_buffer_memory import TokenBufferMemory from core.model_runtime.entities.message_entities import ( PromptMessage, @@ -18,6 +17,9 @@ from core.prompt.utils.prompt_template_parser import PromptTemplateParser from models.model import AppMode +if TYPE_CHECKING: + from core.file.file_obj import FileVar + class ModelMode(enum.Enum): COMPLETION = 'completion' @@ -50,7 +52,7 @@ def get_prompt(self, prompt_template_entity: PromptTemplateEntity, inputs: dict, query: str, - files: list[FileVar], + files: list["FileVar"], context: Optional[str], memory: Optional[TokenBufferMemory], model_config: ModelConfigWithCredentialsEntity) -> \ @@ -163,7 +165,7 @@ def _get_chat_model_prompt_messages(self, app_mode: AppMode, inputs: dict, query: str, context: Optional[str], - files: list[FileVar], + files: list["FileVar"], memory: Optional[TokenBufferMemory], model_config: ModelConfigWithCredentialsEntity) \ -> tuple[list[PromptMessage], Optional[list[str]]]: @@ -206,7 +208,7 @@ def _get_completion_model_prompt_messages(self, app_mode: AppMode, inputs: dict, query: str, context: Optional[str], - files: list[FileVar], + files: list["FileVar"], memory: Optional[TokenBufferMemory], model_config: ModelConfigWithCredentialsEntity) \ -> tuple[list[PromptMessage], Optional[list[str]]]: @@ -255,7 +257,7 @@ def _get_completion_model_prompt_messages(self, app_mode: AppMode, return [self.get_last_user_message(prompt, files)], stops - def get_last_user_message(self, prompt: str, files: list[FileVar]) -> UserPromptMessage: + def get_last_user_message(self, prompt: str, files: list["FileVar"]) -> UserPromptMessage: if files: prompt_message_contents = [TextPromptMessageContent(data=prompt)] for file in files: diff --git a/api/core/tools/provider/builtin/_positions.py b/api/core/tools/provider/builtin/_positions.py index ae806eaff4a03..062668fc5bf8b 100644 --- a/api/core/tools/provider/builtin/_positions.py +++ b/api/core/tools/provider/builtin/_positions.py @@ -1,6 +1,6 @@ import os.path -from core.helper.position_helper import get_position_map, sort_by_position_map +from core.helper.position_helper import get_tool_position_map, sort_by_position_map from core.tools.entities.api_entities import UserToolProvider @@ -10,11 +10,11 @@ class BuiltinToolProviderSort: @classmethod def sort(cls, providers: list[UserToolProvider]) -> list[UserToolProvider]: if not cls._position: - cls._position = get_position_map(os.path.join(os.path.dirname(__file__), '..')) + cls._position = get_tool_position_map(os.path.join(os.path.dirname(__file__), '..')) def name_func(provider: UserToolProvider) -> str: return provider.name sorted_providers = sort_by_position_map(cls._position, providers, name_func) - return sorted_providers \ No newline at end of file + return sorted_providers diff --git a/api/core/tools/tool/tool.py b/api/core/tools/tool/tool.py index 5d561911d1256..d990131b5fbbf 100644 --- a/api/core/tools/tool/tool.py +++ b/api/core/tools/tool/tool.py @@ -2,13 +2,12 @@ from collections.abc import Mapping from copy import deepcopy from enum import Enum -from typing import Any, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union from pydantic import BaseModel, ConfigDict, field_validator from pydantic_core.core_schema import ValidationInfo from core.app.entities.app_invoke_entities import InvokeFrom -from core.file.file_obj import FileVar from core.tools.entities.tool_entities import ( ToolDescription, ToolIdentity, @@ -23,6 +22,9 @@ from core.tools.tool_file_manager import ToolFileManager from core.tools.utils.tool_parameter_converter import ToolParameterConverter +if TYPE_CHECKING: + from core.file.file_obj import FileVar + class Tool(BaseModel, ABC): identity: Optional[ToolIdentity] = None @@ -76,7 +78,7 @@ def fork_tool_runtime(self, runtime: dict[str, Any]) -> 'Tool': description=self.description.model_copy() if self.description else None, runtime=Tool.Runtime(**runtime), ) - + @abstractmethod def tool_provider_type(self) -> ToolProviderType: """ @@ -84,7 +86,7 @@ def tool_provider_type(self) -> ToolProviderType: :return: the tool provider type """ - + def load_variables(self, variables: ToolRuntimeVariablePool): """ load variables from database @@ -99,7 +101,7 @@ def set_image_variable(self, variable_name: str, image_key: str) -> None: """ if not self.variables: return - + self.variables.set_file(self.identity.name, variable_name, image_key) def set_text_variable(self, variable_name: str, text: str) -> None: @@ -108,9 +110,9 @@ def set_text_variable(self, variable_name: str, text: str) -> None: """ if not self.variables: return - + self.variables.set_text(self.identity.name, variable_name, text) - + def get_variable(self, name: Union[str, Enum]) -> Optional[ToolRuntimeVariable]: """ get a variable @@ -120,14 +122,14 @@ def get_variable(self, name: Union[str, Enum]) -> Optional[ToolRuntimeVariable]: """ if not self.variables: return None - + if isinstance(name, Enum): name = name.value - + for variable in self.variables.pool: if variable.name == name: return variable - + return None def get_default_image_variable(self) -> Optional[ToolRuntimeVariable]: @@ -138,9 +140,9 @@ def get_default_image_variable(self) -> Optional[ToolRuntimeVariable]: """ if not self.variables: return None - + return self.get_variable(self.VARIABLE_KEY.IMAGE) - + def get_variable_file(self, name: Union[str, Enum]) -> Optional[bytes]: """ get a variable file @@ -151,7 +153,7 @@ def get_variable_file(self, name: Union[str, Enum]) -> Optional[bytes]: variable = self.get_variable(name) if not variable: return None - + if not isinstance(variable, ToolRuntimeImageVariable): return None @@ -160,9 +162,9 @@ def get_variable_file(self, name: Union[str, Enum]) -> Optional[bytes]: file_binary = ToolFileManager.get_file_binary_by_message_file_id(message_file_id) if not file_binary: return None - + return file_binary[0] - + def list_variables(self) -> list[ToolRuntimeVariable]: """ list all variables @@ -171,9 +173,9 @@ def list_variables(self) -> list[ToolRuntimeVariable]: """ if not self.variables: return [] - + return self.variables.pool - + def list_default_image_variables(self) -> list[ToolRuntimeVariable]: """ list all image variables @@ -182,9 +184,9 @@ def list_default_image_variables(self) -> list[ToolRuntimeVariable]: """ if not self.variables: return [] - + result = [] - + for variable in self.variables.pool: if variable.name.startswith(self.VARIABLE_KEY.IMAGE.value): result.append(variable) @@ -225,7 +227,7 @@ def _transform_tool_parameters_type(self, tool_parameters: Mapping[str, Any]) -> @abstractmethod def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: pass - + def validate_credentials(self, credentials: dict[str, Any], parameters: dict[str, Any]) -> None: """ validate the credentials @@ -244,7 +246,7 @@ def get_runtime_parameters(self) -> list[ToolParameter]: :return: the runtime parameters """ return self.parameters or [] - + def get_all_runtime_parameters(self) -> list[ToolParameter]: """ get all runtime parameters @@ -278,7 +280,7 @@ def get_all_runtime_parameters(self) -> list[ToolParameter]: parameters.append(parameter) return parameters - + def create_image_message(self, image: str, save_as: str = '') -> ToolInvokeMessage: """ create an image message @@ -286,18 +288,18 @@ def create_image_message(self, image: str, save_as: str = '') -> ToolInvokeMessa :param image: the url of the image :return: the image message """ - return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE, - message=image, + return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE, + message=image, save_as=save_as) - - def create_file_var_message(self, file_var: FileVar) -> ToolInvokeMessage: + + def create_file_var_message(self, file_var: "FileVar") -> ToolInvokeMessage: return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.FILE_VAR, message='', meta={ 'file_var': file_var }, save_as='') - + def create_link_message(self, link: str, save_as: str = '') -> ToolInvokeMessage: """ create a link message @@ -305,10 +307,10 @@ def create_link_message(self, link: str, save_as: str = '') -> ToolInvokeMessage :param link: the url of the link :return: the link message """ - return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK, - message=link, + return ToolInvokeMessage(type=ToolInvokeMessage.MessageType.LINK, + message=link, save_as=save_as) - + def create_text_message(self, text: str, save_as: str = '') -> ToolInvokeMessage: """ create a text message @@ -321,7 +323,7 @@ def create_text_message(self, text: str, save_as: str = '') -> ToolInvokeMessage message=text, save_as=save_as ) - + def create_blob_message(self, blob: bytes, meta: dict = None, save_as: str = '') -> ToolInvokeMessage: """ create a blob message diff --git a/api/core/tools/utils/message_transformer.py b/api/core/tools/utils/message_transformer.py index ef9e5b67ae2ab..564b9d3e14c15 100644 --- a/api/core/tools/utils/message_transformer.py +++ b/api/core/tools/utils/message_transformer.py @@ -1,7 +1,7 @@ import logging from mimetypes import guess_extension -from core.file.file_obj import FileTransferMethod, FileType, FileVar +from core.file.file_obj import FileTransferMethod, FileType from core.tools.entities.tool_entities import ToolInvokeMessage from core.tools.tool_file_manager import ToolFileManager @@ -27,12 +27,12 @@ def transform_tool_invoke_messages(cls, messages: list[ToolInvokeMessage], # try to download image try: file = ToolFileManager.create_file_by_url( - user_id=user_id, + user_id=user_id, tenant_id=tenant_id, conversation_id=conversation_id, file_url=message.message ) - + url = f'/files/tools/{file.id}{guess_extension(file.mimetype) or ".png"}' result.append(ToolInvokeMessage( @@ -55,14 +55,14 @@ def transform_tool_invoke_messages(cls, messages: list[ToolInvokeMessage], # if message is str, encode it to bytes if isinstance(message.message, str): message.message = message.message.encode('utf-8') - + file = ToolFileManager.create_file_by_raw( user_id=user_id, tenant_id=tenant_id, conversation_id=conversation_id, file_binary=message.message, mimetype=mimetype ) - + url = cls.get_tool_file_url(file.id, guess_extension(file.mimetype)) # check if file is image @@ -81,7 +81,7 @@ def transform_tool_invoke_messages(cls, messages: list[ToolInvokeMessage], meta=message.meta.copy() if message.meta is not None else {}, )) elif message.type == ToolInvokeMessage.MessageType.FILE_VAR: - file_var: FileVar = message.meta.get('file_var') + file_var = message.meta.get('file_var') if file_var: if file_var.transfer_method == FileTransferMethod.TOOL_FILE: url = cls.get_tool_file_url(file_var.related_id, file_var.extension) @@ -103,7 +103,7 @@ def transform_tool_invoke_messages(cls, messages: list[ToolInvokeMessage], result.append(message) return result - + @classmethod def get_tool_file_url(cls, tool_file_id: str, extension: str) -> str: - return f'/files/tools/{tool_file_id}{extension or ".bin"}' \ No newline at end of file + return f'/files/tools/{tool_file_id}{extension or ".bin"}' diff --git a/api/core/workflow/entities/node_entities.py b/api/core/workflow/entities/node_entities.py index 0978b09b94369..025453567bfc1 100644 --- a/api/core/workflow/entities/node_entities.py +++ b/api/core/workflow/entities/node_entities.py @@ -4,13 +4,14 @@ from pydantic import BaseModel -from models.workflow import WorkflowNodeExecutionStatus +from models import WorkflowNodeExecutionStatus class NodeType(Enum): """ Node Types. """ + START = 'start' END = 'end' ANSWER = 'answer' @@ -44,33 +45,11 @@ def value_of(cls, value: str) -> 'NodeType': raise ValueError(f'invalid node type value {value}') -class SystemVariable(Enum): - """ - System Variables. - """ - QUERY = 'query' - FILES = 'files' - CONVERSATION_ID = 'conversation_id' - USER_ID = 'user_id' - - @classmethod - def value_of(cls, value: str) -> 'SystemVariable': - """ - Get value of given system variable. - - :param value: system variable value - :return: system variable - """ - for system_variable in cls: - if system_variable.value == value: - return system_variable - raise ValueError(f'invalid system variable value {value}') - - class NodeRunMetadataKey(Enum): """ Node Run Metadata Key. """ + TOTAL_TOKENS = 'total_tokens' TOTAL_PRICE = 'total_price' CURRENCY = 'currency' @@ -83,6 +62,7 @@ class NodeRunResult(BaseModel): """ Node Run Result. """ + status: WorkflowNodeExecutionStatus = WorkflowNodeExecutionStatus.RUNNING inputs: Optional[Mapping[str, Any]] = None # node inputs diff --git a/api/core/workflow/entities/variable_pool.py b/api/core/workflow/entities/variable_pool.py index a96a26f794db8..9fe3356faa2ef 100644 --- a/api/core/workflow/entities/variable_pool.py +++ b/api/core/workflow/entities/variable_pool.py @@ -6,7 +6,7 @@ from core.app.segments import Segment, Variable, factory from core.file.file_obj import FileVar -from core.workflow.entities.node_entities import SystemVariable +from core.workflow.enums import SystemVariable VariableValue = Union[str, int, float, dict, list, FileVar] diff --git a/api/core/workflow/enums.py b/api/core/workflow/enums.py new file mode 100644 index 0000000000000..4757cf32f8898 --- /dev/null +++ b/api/core/workflow/enums.py @@ -0,0 +1,25 @@ +from enum import Enum + + +class SystemVariable(str, Enum): + """ + System Variables. + """ + QUERY = 'query' + FILES = 'files' + CONVERSATION_ID = 'conversation_id' + USER_ID = 'user_id' + DIALOGUE_COUNT = 'dialogue_count' + + @classmethod + def value_of(cls, value: str): + """ + Get value of given system variable. + + :param value: system variable value + :return: system variable + """ + for system_variable in cls: + if system_variable.value == value: + return system_variable + raise ValueError(f'invalid system variable value {value}') diff --git a/api/core/workflow/nodes/llm/llm_node.py b/api/core/workflow/nodes/llm/llm_node.py index 4431259a57543..c20e0d45062f5 100644 --- a/api/core/workflow/nodes/llm/llm_node.py +++ b/api/core/workflow/nodes/llm/llm_node.py @@ -1,14 +1,13 @@ import json from collections.abc import Generator from copy import deepcopy -from typing import Optional, cast +from typing import TYPE_CHECKING, Optional, cast from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity from core.app.entities.queue_entities import QueueRetrieverResourcesEvent from core.entities.model_entities import ModelStatus from core.entities.provider_entities import QuotaUnit from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError -from core.file.file_obj import FileVar from core.memory.token_buffer_memory import TokenBufferMemory from core.model_manager import ModelInstance, ModelManager from core.model_runtime.entities.llm_entities import LLMUsage @@ -23,8 +22,9 @@ from core.prompt.advanced_prompt_transform import AdvancedPromptTransform from core.prompt.entities.advanced_prompt_entities import CompletionModelPromptTemplate, MemoryConfig from core.prompt.utils.prompt_message_util import PromptMessageUtil -from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeRunResult, NodeType, SystemVariable +from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeRunResult, NodeType from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable from core.workflow.nodes.base_node import BaseNode from core.workflow.nodes.llm.entities import ( LLMNodeChatModelMessage, @@ -38,6 +38,10 @@ from models.provider import Provider, ProviderType from models.workflow import WorkflowNodeExecutionStatus +if TYPE_CHECKING: + from core.file.file_obj import FileVar + + class LLMNode(BaseNode): _node_data_cls = LLMNodeData @@ -70,7 +74,7 @@ def _run(self, variable_pool: VariablePool) -> NodeRunResult: node_inputs = {} # fetch files - files: list[FileVar] = self._fetch_files(node_data, variable_pool) + files = self._fetch_files(node_data, variable_pool) if files: node_inputs['#files#'] = [file.to_dict() for file in files] @@ -201,8 +205,8 @@ def _handle_invoke_result(self, invoke_result: Generator) -> tuple[str, LLMUsage usage = LLMUsage.empty_usage() return full_text, usage - - def _transform_chat_messages(self, + + def _transform_chat_messages(self, messages: list[LLMNodeChatModelMessage] | LLMNodeCompletionModelPromptTemplate ) -> list[LLMNodeChatModelMessage] | LLMNodeCompletionModelPromptTemplate: """ @@ -249,13 +253,13 @@ def parse_dict(d: dict) -> str: # check if it's a context structure if 'metadata' in d and '_source' in d['metadata'] and 'content' in d: return d['content'] - + # else, parse the dict try: return json.dumps(d, ensure_ascii=False) except Exception: return str(d) - + if isinstance(value, str): value = value elif isinstance(value, list): @@ -321,7 +325,7 @@ def _fetch_inputs(self, node_data: LLMNodeData, variable_pool: VariablePool) -> return inputs - def _fetch_files(self, node_data: LLMNodeData, variable_pool: VariablePool) -> list[FileVar]: + def _fetch_files(self, node_data: LLMNodeData, variable_pool: VariablePool) -> list["FileVar"]: """ Fetch files :param node_data: node data @@ -520,7 +524,7 @@ def _fetch_prompt_messages(self, node_data: LLMNodeData, query: Optional[str], query_prompt_template: Optional[str], inputs: dict[str, str], - files: list[FileVar], + files: list["FileVar"], context: Optional[str], memory: Optional[TokenBufferMemory], model_config: ModelConfigWithCredentialsEntity) \ diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py index 87bfa5beae880..554e3b6074ed5 100644 --- a/api/core/workflow/nodes/tool/tool_node.py +++ b/api/core/workflow/nodes/tool/tool_node.py @@ -2,19 +2,20 @@ from os import path from typing import Any, cast -from core.app.segments import parser +from core.app.segments import ArrayAnyVariable, parser from core.callback_handler.workflow_tool_callback_handler import DifyWorkflowCallbackHandler from core.file.file_obj import FileTransferMethod, FileType, FileVar from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter from core.tools.tool_engine import ToolEngine from core.tools.tool_manager import ToolManager from core.tools.utils.message_transformer import ToolFileMessageTransformer -from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeRunResult, NodeType, SystemVariable +from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeRunResult, NodeType from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable from core.workflow.nodes.base_node import BaseNode from core.workflow.nodes.tool.entities import ToolNodeData from core.workflow.utils.variable_template_parser import VariableTemplateParser -from models.workflow import WorkflowNodeExecutionStatus +from models import WorkflowNodeExecutionStatus class ToolNode(BaseNode): @@ -140,9 +141,9 @@ def _generate_parameters( return result def _fetch_files(self, variable_pool: VariablePool) -> list[FileVar]: - # FIXME: ensure this is a ArrayVariable contains FileVariable. variable = variable_pool.get(['sys', SystemVariable.FILES.value]) - return [file_var.value for file_var in variable.value] if variable else [] + assert isinstance(variable, ArrayAnyVariable) + return list(variable.value) if variable else [] def _convert_tool_messages(self, messages: list[ToolInvokeMessage]): """ diff --git a/api/core/workflow/workflow_engine_manager.py b/api/core/workflow/workflow_engine_manager.py index f299f84efb3ee..3157eedfee523 100644 --- a/api/core/workflow/workflow_engine_manager.py +++ b/api/core/workflow/workflow_engine_manager.py @@ -3,6 +3,7 @@ from collections.abc import Mapping, Sequence from typing import Any, Optional, cast +import contexts from configs import dify_config from core.app.apps.base_app_queue_manager import GenerateTaskStoppedException from core.app.entities.app_invoke_entities import InvokeFrom @@ -97,16 +98,16 @@ def run_workflow( invoke_from: InvokeFrom, callbacks: Sequence[WorkflowCallback], call_depth: int = 0, - variable_pool: VariablePool, + variable_pool: VariablePool | None = None, ) -> None: """ :param workflow: Workflow instance :param user_id: user id :param user_from: user from - :param user_inputs: user variables inputs - :param system_inputs: system inputs, like: query, files + :param invoke_from: invoke from :param callbacks: workflow callbacks :param call_depth: call depth + :param variable_pool: variable pool """ # fetch workflow graph graph = workflow.graph_dict @@ -128,6 +129,8 @@ def run_workflow( raise ValueError('Max workflow call depth {} reached.'.format(workflow_call_max_depth)) # init workflow run state + if not variable_pool: + variable_pool = contexts.workflow_variable_pool.get() workflow_run_state = WorkflowRunState( workflow=workflow, start_at=time.perf_counter(), diff --git a/api/events/app_event.py b/api/events/app_event.py index 67a5982527f7b..f2ce71bbbb363 100644 --- a/api/events/app_event.py +++ b/api/events/app_event.py @@ -1,13 +1,13 @@ from blinker import signal # sender: app -app_was_created = signal('app-was-created') +app_was_created = signal("app-was-created") # sender: app, kwargs: app_model_config -app_model_config_was_updated = signal('app-model-config-was-updated') +app_model_config_was_updated = signal("app-model-config-was-updated") # sender: app, kwargs: published_workflow -app_published_workflow_was_updated = signal('app-published-workflow-was-updated') +app_published_workflow_was_updated = signal("app-published-workflow-was-updated") # sender: app, kwargs: synced_draft_workflow -app_draft_workflow_was_synced = signal('app-draft-workflow-was-synced') +app_draft_workflow_was_synced = signal("app-draft-workflow-was-synced") diff --git a/api/events/dataset_event.py b/api/events/dataset_event.py index d4a2b6f313c13..750b7424e2347 100644 --- a/api/events/dataset_event.py +++ b/api/events/dataset_event.py @@ -1,4 +1,4 @@ from blinker import signal # sender: dataset -dataset_was_deleted = signal('dataset-was-deleted') +dataset_was_deleted = signal("dataset-was-deleted") diff --git a/api/events/document_event.py b/api/events/document_event.py index f95326630b2b7..2c5a416a5e0c9 100644 --- a/api/events/document_event.py +++ b/api/events/document_event.py @@ -1,4 +1,4 @@ from blinker import signal # sender: document -document_was_deleted = signal('document-was-deleted') +document_was_deleted = signal("document-was-deleted") diff --git a/api/events/event_handlers/clean_when_dataset_deleted.py b/api/events/event_handlers/clean_when_dataset_deleted.py index 42f1c70614c49..7caa2d1cc9f3f 100644 --- a/api/events/event_handlers/clean_when_dataset_deleted.py +++ b/api/events/event_handlers/clean_when_dataset_deleted.py @@ -5,5 +5,11 @@ @dataset_was_deleted.connect def handle(sender, **kwargs): dataset = sender - clean_dataset_task.delay(dataset.id, dataset.tenant_id, dataset.indexing_technique, - dataset.index_struct, dataset.collection_binding_id, dataset.doc_form) + clean_dataset_task.delay( + dataset.id, + dataset.tenant_id, + dataset.indexing_technique, + dataset.index_struct, + dataset.collection_binding_id, + dataset.doc_form, + ) diff --git a/api/events/event_handlers/clean_when_document_deleted.py b/api/events/event_handlers/clean_when_document_deleted.py index 24022da15f81e..00a66f50ad931 100644 --- a/api/events/event_handlers/clean_when_document_deleted.py +++ b/api/events/event_handlers/clean_when_document_deleted.py @@ -5,7 +5,7 @@ @document_was_deleted.connect def handle(sender, **kwargs): document_id = sender - dataset_id = kwargs.get('dataset_id') - doc_form = kwargs.get('doc_form') - file_id = kwargs.get('file_id') + dataset_id = kwargs.get("dataset_id") + doc_form = kwargs.get("doc_form") + file_id = kwargs.get("file_id") clean_document_task.delay(document_id, dataset_id, doc_form, file_id) diff --git a/api/events/event_handlers/create_document_index.py b/api/events/event_handlers/create_document_index.py index 68dae5a5537cd..72a135e73d4ca 100644 --- a/api/events/event_handlers/create_document_index.py +++ b/api/events/event_handlers/create_document_index.py @@ -14,21 +14,25 @@ @document_index_created.connect def handle(sender, **kwargs): dataset_id = sender - document_ids = kwargs.get('document_ids', None) + document_ids = kwargs.get("document_ids", None) documents = [] start_at = time.perf_counter() for document_id in document_ids: - logging.info(click.style('Start process document: {}'.format(document_id), fg='green')) + logging.info(click.style("Start process document: {}".format(document_id), fg="green")) - document = db.session.query(Document).filter( - Document.id == document_id, - Document.dataset_id == dataset_id - ).first() + document = ( + db.session.query(Document) + .filter( + Document.id == document_id, + Document.dataset_id == dataset_id, + ) + .first() + ) if not document: - raise NotFound('Document not found') + raise NotFound("Document not found") - document.indexing_status = 'parsing' + document.indexing_status = "parsing" document.processing_started_at = datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None) documents.append(document) db.session.add(document) @@ -38,8 +42,8 @@ def handle(sender, **kwargs): indexing_runner = IndexingRunner() indexing_runner.run(documents) end_at = time.perf_counter() - logging.info(click.style('Processed dataset: {} latency: {}'.format(dataset_id, end_at - start_at), fg='green')) + logging.info(click.style("Processed dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green")) except DocumentIsPausedException as ex: - logging.info(click.style(str(ex), fg='yellow')) + logging.info(click.style(str(ex), fg="yellow")) except Exception: pass diff --git a/api/events/event_handlers/create_installed_app_when_app_created.py b/api/events/event_handlers/create_installed_app_when_app_created.py index 31084ce0fe8bd..57412cc4ad0af 100644 --- a/api/events/event_handlers/create_installed_app_when_app_created.py +++ b/api/events/event_handlers/create_installed_app_when_app_created.py @@ -10,7 +10,7 @@ def handle(sender, **kwargs): installed_app = InstalledApp( tenant_id=app.tenant_id, app_id=app.id, - app_owner_tenant_id=app.tenant_id + app_owner_tenant_id=app.tenant_id, ) db.session.add(installed_app) db.session.commit() diff --git a/api/events/event_handlers/create_site_record_when_app_created.py b/api/events/event_handlers/create_site_record_when_app_created.py index f0eb7159b631e..abaf0e41ec30e 100644 --- a/api/events/event_handlers/create_site_record_when_app_created.py +++ b/api/events/event_handlers/create_site_record_when_app_created.py @@ -7,15 +7,15 @@ def handle(sender, **kwargs): """Create site record when an app is created.""" app = sender - account = kwargs.get('account') + account = kwargs.get("account") site = Site( app_id=app.id, title=app.name, - icon = app.icon, - icon_background = app.icon_background, + icon=app.icon, + icon_background=app.icon_background, default_language=account.interface_language, - customize_token_strategy='not_allow', - code=Site.generate_code(16) + customize_token_strategy="not_allow", + code=Site.generate_code(16), ) db.session.add(site) diff --git a/api/events/event_handlers/deduct_quota_when_messaeg_created.py b/api/events/event_handlers/deduct_quota_when_messaeg_created.py index 8cf52bf8f5d0b..843a2320968ce 100644 --- a/api/events/event_handlers/deduct_quota_when_messaeg_created.py +++ b/api/events/event_handlers/deduct_quota_when_messaeg_created.py @@ -8,7 +8,7 @@ @message_was_created.connect def handle(sender, **kwargs): message = sender - application_generate_entity = kwargs.get('application_generate_entity') + application_generate_entity = kwargs.get("application_generate_entity") if not isinstance(application_generate_entity, ChatAppGenerateEntity | AgentChatAppGenerateEntity): return @@ -39,7 +39,7 @@ def handle(sender, **kwargs): elif quota_unit == QuotaUnit.CREDITS: used_quota = 1 - if 'gpt-4' in model_config.model: + if "gpt-4" in model_config.model: used_quota = 20 else: used_quota = 1 @@ -50,6 +50,6 @@ def handle(sender, **kwargs): Provider.provider_name == model_config.provider, Provider.provider_type == ProviderType.SYSTEM.value, Provider.quota_type == system_configuration.current_quota_type.value, - Provider.quota_limit > Provider.quota_used - ).update({'quota_used': Provider.quota_used + used_quota}) + Provider.quota_limit > Provider.quota_used, + ).update({"quota_used": Provider.quota_used + used_quota}) db.session.commit() diff --git a/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py b/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py index 1f6da34ee24d5..f96bb5ef74b62 100644 --- a/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py +++ b/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py @@ -8,8 +8,8 @@ @app_draft_workflow_was_synced.connect def handle(sender, **kwargs): app = sender - for node_data in kwargs.get('synced_draft_workflow').graph_dict.get('nodes', []): - if node_data.get('data', {}).get('type') == NodeType.TOOL.value: + for node_data in kwargs.get("synced_draft_workflow").graph_dict.get("nodes", []): + if node_data.get("data", {}).get("type") == NodeType.TOOL.value: try: tool_entity = ToolEntity(**node_data["data"]) tool_runtime = ToolManager.get_tool_runtime( @@ -23,7 +23,7 @@ def handle(sender, **kwargs): tool_runtime=tool_runtime, provider_name=tool_entity.provider_name, provider_type=tool_entity.provider_type, - identity_id=f'WORKFLOW.{app.id}.{node_data.get("id")}' + identity_id=f'WORKFLOW.{app.id}.{node_data.get("id")}', ) manager.delete_tool_parameters_cache() except: diff --git a/api/events/event_handlers/document_index_event.py b/api/events/event_handlers/document_index_event.py index 9c4e055debdd9..3d463fe5b35ac 100644 --- a/api/events/event_handlers/document_index_event.py +++ b/api/events/event_handlers/document_index_event.py @@ -1,4 +1,4 @@ from blinker import signal # sender: document -document_index_created = signal('document-index-created') +document_index_created = signal("document-index-created") diff --git a/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py b/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py index 2b202c53d0b88..59375b1a0b1a8 100644 --- a/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py +++ b/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py @@ -7,13 +7,11 @@ @app_model_config_was_updated.connect def handle(sender, **kwargs): app = sender - app_model_config = kwargs.get('app_model_config') + app_model_config = kwargs.get("app_model_config") dataset_ids = get_dataset_ids_from_model_config(app_model_config) - app_dataset_joins = db.session.query(AppDatasetJoin).filter( - AppDatasetJoin.app_id == app.id - ).all() + app_dataset_joins = db.session.query(AppDatasetJoin).filter(AppDatasetJoin.app_id == app.id).all() removed_dataset_ids = [] if not app_dataset_joins: @@ -29,16 +27,12 @@ def handle(sender, **kwargs): if removed_dataset_ids: for dataset_id in removed_dataset_ids: db.session.query(AppDatasetJoin).filter( - AppDatasetJoin.app_id == app.id, - AppDatasetJoin.dataset_id == dataset_id + AppDatasetJoin.app_id == app.id, AppDatasetJoin.dataset_id == dataset_id ).delete() if added_dataset_ids: for dataset_id in added_dataset_ids: - app_dataset_join = AppDatasetJoin( - app_id=app.id, - dataset_id=dataset_id - ) + app_dataset_join = AppDatasetJoin(app_id=app.id, dataset_id=dataset_id) db.session.add(app_dataset_join) db.session.commit() @@ -51,7 +45,7 @@ def get_dataset_ids_from_model_config(app_model_config: AppModelConfig) -> set: agent_mode = app_model_config.agent_mode_dict - tools = agent_mode.get('tools', []) or [] + tools = agent_mode.get("tools", []) or [] for tool in tools: if len(list(tool.keys())) != 1: continue @@ -63,11 +57,11 @@ def get_dataset_ids_from_model_config(app_model_config: AppModelConfig) -> set: # get dataset from dataset_configs dataset_configs = app_model_config.dataset_configs_dict - datasets = dataset_configs.get('datasets', {}) or {} - for dataset in datasets.get('datasets', []) or []: + datasets = dataset_configs.get("datasets", {}) or {} + for dataset in datasets.get("datasets", []) or []: keys = list(dataset.keys()) - if len(keys) == 1 and keys[0] == 'dataset': - if dataset['dataset'].get('id'): - dataset_ids.add(dataset['dataset'].get('id')) + if len(keys) == 1 and keys[0] == "dataset": + if dataset["dataset"].get("id"): + dataset_ids.add(dataset["dataset"].get("id")) return dataset_ids diff --git a/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py b/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py index 996b1e96910b9..333b85ecb2907 100644 --- a/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py +++ b/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py @@ -11,13 +11,11 @@ @app_published_workflow_was_updated.connect def handle(sender, **kwargs): app = sender - published_workflow = kwargs.get('published_workflow') + published_workflow = kwargs.get("published_workflow") published_workflow = cast(Workflow, published_workflow) dataset_ids = get_dataset_ids_from_workflow(published_workflow) - app_dataset_joins = db.session.query(AppDatasetJoin).filter( - AppDatasetJoin.app_id == app.id - ).all() + app_dataset_joins = db.session.query(AppDatasetJoin).filter(AppDatasetJoin.app_id == app.id).all() removed_dataset_ids = [] if not app_dataset_joins: @@ -33,16 +31,12 @@ def handle(sender, **kwargs): if removed_dataset_ids: for dataset_id in removed_dataset_ids: db.session.query(AppDatasetJoin).filter( - AppDatasetJoin.app_id == app.id, - AppDatasetJoin.dataset_id == dataset_id + AppDatasetJoin.app_id == app.id, AppDatasetJoin.dataset_id == dataset_id ).delete() if added_dataset_ids: for dataset_id in added_dataset_ids: - app_dataset_join = AppDatasetJoin( - app_id=app.id, - dataset_id=dataset_id - ) + app_dataset_join = AppDatasetJoin(app_id=app.id, dataset_id=dataset_id) db.session.add(app_dataset_join) db.session.commit() @@ -54,18 +48,19 @@ def get_dataset_ids_from_workflow(published_workflow: Workflow) -> set: if not graph: return dataset_ids - nodes = graph.get('nodes', []) + nodes = graph.get("nodes", []) # fetch all knowledge retrieval nodes - knowledge_retrieval_nodes = [node for node in nodes - if node.get('data', {}).get('type') == NodeType.KNOWLEDGE_RETRIEVAL.value] + knowledge_retrieval_nodes = [ + node for node in nodes if node.get("data", {}).get("type") == NodeType.KNOWLEDGE_RETRIEVAL.value + ] if not knowledge_retrieval_nodes: return dataset_ids for node in knowledge_retrieval_nodes: try: - node_data = KnowledgeRetrievalNodeData(**node.get('data', {})) + node_data = KnowledgeRetrievalNodeData(**node.get("data", {})) dataset_ids.update(node_data.dataset_ids) except Exception as e: continue diff --git a/api/events/event_handlers/update_provider_last_used_at_when_messaeg_created.py b/api/events/event_handlers/update_provider_last_used_at_when_messaeg_created.py index 6188f1a0850ac..a80572c0debb1 100644 --- a/api/events/event_handlers/update_provider_last_used_at_when_messaeg_created.py +++ b/api/events/event_handlers/update_provider_last_used_at_when_messaeg_created.py @@ -9,13 +9,13 @@ @message_was_created.connect def handle(sender, **kwargs): message = sender - application_generate_entity = kwargs.get('application_generate_entity') + application_generate_entity = kwargs.get("application_generate_entity") if not isinstance(application_generate_entity, ChatAppGenerateEntity | AgentChatAppGenerateEntity): return db.session.query(Provider).filter( Provider.tenant_id == application_generate_entity.app_config.tenant_id, - Provider.provider_name == application_generate_entity.model_conf.provider - ).update({'last_used': datetime.now(timezone.utc).replace(tzinfo=None)}) + Provider.provider_name == application_generate_entity.model_conf.provider, + ).update({"last_used": datetime.now(timezone.utc).replace(tzinfo=None)}) db.session.commit() diff --git a/api/events/message_event.py b/api/events/message_event.py index 21da83f2496af..6576c35c453c9 100644 --- a/api/events/message_event.py +++ b/api/events/message_event.py @@ -1,4 +1,4 @@ from blinker import signal # sender: message, kwargs: conversation -message_was_created = signal('message-was-created') +message_was_created = signal("message-was-created") diff --git a/api/events/tenant_event.py b/api/events/tenant_event.py index 942f709917bf6..d99feaac40896 100644 --- a/api/events/tenant_event.py +++ b/api/events/tenant_event.py @@ -1,7 +1,7 @@ from blinker import signal # sender: tenant -tenant_was_created = signal('tenant-was-created') +tenant_was_created = signal("tenant-was-created") # sender: tenant -tenant_was_updated = signal('tenant-was-updated') +tenant_was_updated = signal("tenant-was-updated") diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py index ae9a07534084e..f5ec7c1759cb9 100644 --- a/api/extensions/ext_celery.py +++ b/api/extensions/ext_celery.py @@ -17,7 +17,7 @@ def __call__(self, *args: object, **kwargs: object) -> object: backend=app.config["CELERY_BACKEND"], task_ignore_result=True, ) - + # Add SSL options to the Celery configuration ssl_options = { "ssl_cert_reqs": None, @@ -35,7 +35,7 @@ def __call__(self, *args: object, **kwargs: object) -> object: celery_app.conf.update( broker_use_ssl=ssl_options, # Add the SSL options to the broker configuration ) - + celery_app.set_default() app.extensions["celery"] = celery_app @@ -45,18 +45,15 @@ def __call__(self, *args: object, **kwargs: object) -> object: ] day = app.config["CELERY_BEAT_SCHEDULER_TIME"] beat_schedule = { - 'clean_embedding_cache_task': { - 'task': 'schedule.clean_embedding_cache_task.clean_embedding_cache_task', - 'schedule': timedelta(days=day), + "clean_embedding_cache_task": { + "task": "schedule.clean_embedding_cache_task.clean_embedding_cache_task", + "schedule": timedelta(days=day), + }, + "clean_unused_datasets_task": { + "task": "schedule.clean_unused_datasets_task.clean_unused_datasets_task", + "schedule": timedelta(days=day), }, - 'clean_unused_datasets_task': { - 'task': 'schedule.clean_unused_datasets_task.clean_unused_datasets_task', - 'schedule': timedelta(days=day), - } } - celery_app.conf.update( - beat_schedule=beat_schedule, - imports=imports - ) + celery_app.conf.update(beat_schedule=beat_schedule, imports=imports) return celery_app diff --git a/api/extensions/ext_compress.py b/api/extensions/ext_compress.py index 1dbaffcfb0dc2..38e67749fcc99 100644 --- a/api/extensions/ext_compress.py +++ b/api/extensions/ext_compress.py @@ -2,15 +2,14 @@ def init_app(app: Flask): - if app.config.get('API_COMPRESSION_ENABLED'): + if app.config.get("API_COMPRESSION_ENABLED"): from flask_compress import Compress - app.config['COMPRESS_MIMETYPES'] = [ - 'application/json', - 'image/svg+xml', - 'text/html', + app.config["COMPRESS_MIMETYPES"] = [ + "application/json", + "image/svg+xml", + "text/html", ] compress = Compress() compress.init_app(app) - diff --git a/api/extensions/ext_database.py b/api/extensions/ext_database.py index c248e173a252c..f6ffa536343af 100644 --- a/api/extensions/ext_database.py +++ b/api/extensions/ext_database.py @@ -2,11 +2,11 @@ from sqlalchemy import MetaData POSTGRES_INDEXES_NAMING_CONVENTION = { - 'ix': '%(column_0_label)s_idx', - 'uq': '%(table_name)s_%(column_0_name)s_key', - 'ck': '%(table_name)s_%(constraint_name)s_check', - 'fk': '%(table_name)s_%(column_0_name)s_fkey', - 'pk': '%(table_name)s_pkey', + "ix": "%(column_0_label)s_idx", + "uq": "%(table_name)s_%(column_0_name)s_key", + "ck": "%(table_name)s_%(constraint_name)s_check", + "fk": "%(table_name)s_%(column_0_name)s_fkey", + "pk": "%(table_name)s_pkey", } metadata = MetaData(naming_convention=POSTGRES_INDEXES_NAMING_CONVENTION) diff --git a/api/extensions/ext_mail.py b/api/extensions/ext_mail.py index ec3a5cc112b87..b435294abc23b 100644 --- a/api/extensions/ext_mail.py +++ b/api/extensions/ext_mail.py @@ -14,67 +14,69 @@ def is_inited(self) -> bool: return self._client is not None def init_app(self, app: Flask): - if app.config.get('MAIL_TYPE'): - if app.config.get('MAIL_DEFAULT_SEND_FROM'): - self._default_send_from = app.config.get('MAIL_DEFAULT_SEND_FROM') - - if app.config.get('MAIL_TYPE') == 'resend': - api_key = app.config.get('RESEND_API_KEY') + if app.config.get("MAIL_TYPE"): + if app.config.get("MAIL_DEFAULT_SEND_FROM"): + self._default_send_from = app.config.get("MAIL_DEFAULT_SEND_FROM") + + if app.config.get("MAIL_TYPE") == "resend": + api_key = app.config.get("RESEND_API_KEY") if not api_key: - raise ValueError('RESEND_API_KEY is not set') + raise ValueError("RESEND_API_KEY is not set") - api_url = app.config.get('RESEND_API_URL') + api_url = app.config.get("RESEND_API_URL") if api_url: resend.api_url = api_url resend.api_key = api_key self._client = resend.Emails - elif app.config.get('MAIL_TYPE') == 'smtp': + elif app.config.get("MAIL_TYPE") == "smtp": from libs.smtp import SMTPClient - if not app.config.get('SMTP_SERVER') or not app.config.get('SMTP_PORT'): - raise ValueError('SMTP_SERVER and SMTP_PORT are required for smtp mail type') - if not app.config.get('SMTP_USE_TLS') and app.config.get('SMTP_OPPORTUNISTIC_TLS'): - raise ValueError('SMTP_OPPORTUNISTIC_TLS is not supported without enabling SMTP_USE_TLS') + + if not app.config.get("SMTP_SERVER") or not app.config.get("SMTP_PORT"): + raise ValueError("SMTP_SERVER and SMTP_PORT are required for smtp mail type") + if not app.config.get("SMTP_USE_TLS") and app.config.get("SMTP_OPPORTUNISTIC_TLS"): + raise ValueError("SMTP_OPPORTUNISTIC_TLS is not supported without enabling SMTP_USE_TLS") self._client = SMTPClient( - server=app.config.get('SMTP_SERVER'), - port=app.config.get('SMTP_PORT'), - username=app.config.get('SMTP_USERNAME'), - password=app.config.get('SMTP_PASSWORD'), - _from=app.config.get('MAIL_DEFAULT_SEND_FROM'), - use_tls=app.config.get('SMTP_USE_TLS'), - opportunistic_tls=app.config.get('SMTP_OPPORTUNISTIC_TLS') + server=app.config.get("SMTP_SERVER"), + port=app.config.get("SMTP_PORT"), + username=app.config.get("SMTP_USERNAME"), + password=app.config.get("SMTP_PASSWORD"), + _from=app.config.get("MAIL_DEFAULT_SEND_FROM"), + use_tls=app.config.get("SMTP_USE_TLS"), + opportunistic_tls=app.config.get("SMTP_OPPORTUNISTIC_TLS"), ) else: - raise ValueError('Unsupported mail type {}'.format(app.config.get('MAIL_TYPE'))) + raise ValueError("Unsupported mail type {}".format(app.config.get("MAIL_TYPE"))) else: - logging.warning('MAIL_TYPE is not set') - + logging.warning("MAIL_TYPE is not set") def send(self, to: str, subject: str, html: str, from_: Optional[str] = None): if not self._client: - raise ValueError('Mail client is not initialized') + raise ValueError("Mail client is not initialized") if not from_ and self._default_send_from: from_ = self._default_send_from if not from_: - raise ValueError('mail from is not set') + raise ValueError("mail from is not set") if not to: - raise ValueError('mail to is not set') + raise ValueError("mail to is not set") if not subject: - raise ValueError('mail subject is not set') + raise ValueError("mail subject is not set") if not html: - raise ValueError('mail html is not set') - - self._client.send({ - "from": from_, - "to": to, - "subject": subject, - "html": html - }) + raise ValueError("mail html is not set") + + self._client.send( + { + "from": from_, + "to": to, + "subject": subject, + "html": html, + } + ) def init_app(app: Flask): diff --git a/api/extensions/ext_redis.py b/api/extensions/ext_redis.py index 23d7768d4d0f5..d5fb162fd8f2f 100644 --- a/api/extensions/ext_redis.py +++ b/api/extensions/ext_redis.py @@ -6,18 +6,21 @@ def init_app(app): connection_class = Connection - if app.config.get('REDIS_USE_SSL'): + if app.config.get("REDIS_USE_SSL"): connection_class = SSLConnection - redis_client.connection_pool = redis.ConnectionPool(**{ - 'host': app.config.get('REDIS_HOST'), - 'port': app.config.get('REDIS_PORT'), - 'username': app.config.get('REDIS_USERNAME'), - 'password': app.config.get('REDIS_PASSWORD'), - 'db': app.config.get('REDIS_DB'), - 'encoding': 'utf-8', - 'encoding_errors': 'strict', - 'decode_responses': False - }, connection_class=connection_class) + redis_client.connection_pool = redis.ConnectionPool( + **{ + "host": app.config.get("REDIS_HOST"), + "port": app.config.get("REDIS_PORT"), + "username": app.config.get("REDIS_USERNAME"), + "password": app.config.get("REDIS_PASSWORD"), + "db": app.config.get("REDIS_DB"), + "encoding": "utf-8", + "encoding_errors": "strict", + "decode_responses": False, + }, + connection_class=connection_class, + ) - app.extensions['redis'] = redis_client + app.extensions["redis"] = redis_client diff --git a/api/extensions/ext_sentry.py b/api/extensions/ext_sentry.py index f05c10bc08926..227c6635f0eb1 100644 --- a/api/extensions/ext_sentry.py +++ b/api/extensions/ext_sentry.py @@ -5,16 +5,13 @@ def init_app(app): - if app.config.get('SENTRY_DSN'): + if app.config.get("SENTRY_DSN"): sentry_sdk.init( - dsn=app.config.get('SENTRY_DSN'), - integrations=[ - FlaskIntegration(), - CeleryIntegration() - ], + dsn=app.config.get("SENTRY_DSN"), + integrations=[FlaskIntegration(), CeleryIntegration()], ignore_errors=[HTTPException, ValueError], - traces_sample_rate=app.config.get('SENTRY_TRACES_SAMPLE_RATE', 1.0), - profiles_sample_rate=app.config.get('SENTRY_PROFILES_SAMPLE_RATE', 1.0), - environment=app.config.get('DEPLOY_ENV'), - release=f"dify-{app.config.get('CURRENT_VERSION')}-{app.config.get('COMMIT_SHA')}" + traces_sample_rate=app.config.get("SENTRY_TRACES_SAMPLE_RATE", 1.0), + profiles_sample_rate=app.config.get("SENTRY_PROFILES_SAMPLE_RATE", 1.0), + environment=app.config.get("DEPLOY_ENV"), + release=f"dify-{app.config.get('CURRENT_VERSION')}-{app.config.get('COMMIT_SHA')}", ) diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index 38db1c6ce103a..e6c4352577fc3 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -17,31 +17,19 @@ def __init__(self): self.storage_runner = None def init_app(self, app: Flask): - storage_type = app.config.get('STORAGE_TYPE') - if storage_type == 's3': - self.storage_runner = S3Storage( - app=app - ) - elif storage_type == 'azure-blob': - self.storage_runner = AzureStorage( - app=app - ) - elif storage_type == 'aliyun-oss': - self.storage_runner = AliyunStorage( - app=app - ) - elif storage_type == 'google-storage': - self.storage_runner = GoogleStorage( - app=app - ) - elif storage_type == 'tencent-cos': - self.storage_runner = TencentStorage( - app=app - ) - elif storage_type == 'oci-storage': - self.storage_runner = OCIStorage( - app=app - ) + storage_type = app.config.get("STORAGE_TYPE") + if storage_type == "s3": + self.storage_runner = S3Storage(app=app) + elif storage_type == "azure-blob": + self.storage_runner = AzureStorage(app=app) + elif storage_type == "aliyun-oss": + self.storage_runner = AliyunStorage(app=app) + elif storage_type == "google-storage": + self.storage_runner = GoogleStorage(app=app) + elif storage_type == "tencent-cos": + self.storage_runner = TencentStorage(app=app) + elif storage_type == "oci-storage": + self.storage_runner = OCIStorage(app=app) else: self.storage_runner = LocalStorage(app=app) diff --git a/api/extensions/storage/aliyun_storage.py b/api/extensions/storage/aliyun_storage.py index b81a8691f1545..b962cedc55178 100644 --- a/api/extensions/storage/aliyun_storage.py +++ b/api/extensions/storage/aliyun_storage.py @@ -8,23 +8,22 @@ class AliyunStorage(BaseStorage): - """Implementation for aliyun storage. - """ + """Implementation for aliyun storage.""" def __init__(self, app: Flask): super().__init__(app) app_config = self.app.config - self.bucket_name = app_config.get('ALIYUN_OSS_BUCKET_NAME') + self.bucket_name = app_config.get("ALIYUN_OSS_BUCKET_NAME") oss_auth_method = aliyun_s3.Auth region = None - if app_config.get('ALIYUN_OSS_AUTH_VERSION') == 'v4': + if app_config.get("ALIYUN_OSS_AUTH_VERSION") == "v4": oss_auth_method = aliyun_s3.AuthV4 - region = app_config.get('ALIYUN_OSS_REGION') - oss_auth = oss_auth_method(app_config.get('ALIYUN_OSS_ACCESS_KEY'), app_config.get('ALIYUN_OSS_SECRET_KEY')) + region = app_config.get("ALIYUN_OSS_REGION") + oss_auth = oss_auth_method(app_config.get("ALIYUN_OSS_ACCESS_KEY"), app_config.get("ALIYUN_OSS_SECRET_KEY")) self.client = aliyun_s3.Bucket( oss_auth, - app_config.get('ALIYUN_OSS_ENDPOINT'), + app_config.get("ALIYUN_OSS_ENDPOINT"), self.bucket_name, connect_timeout=30, region=region, diff --git a/api/extensions/storage/azure_storage.py b/api/extensions/storage/azure_storage.py index af3e7ef84911f..ca8cbb9188b5c 100644 --- a/api/extensions/storage/azure_storage.py +++ b/api/extensions/storage/azure_storage.py @@ -9,16 +9,15 @@ class AzureStorage(BaseStorage): - """Implementation for azure storage. - """ + """Implementation for azure storage.""" def __init__(self, app: Flask): super().__init__(app) app_config = self.app.config - self.bucket_name = app_config.get('AZURE_BLOB_CONTAINER_NAME') - self.account_url = app_config.get('AZURE_BLOB_ACCOUNT_URL') - self.account_name = app_config.get('AZURE_BLOB_ACCOUNT_NAME') - self.account_key = app_config.get('AZURE_BLOB_ACCOUNT_KEY') + self.bucket_name = app_config.get("AZURE_BLOB_CONTAINER_NAME") + self.account_url = app_config.get("AZURE_BLOB_ACCOUNT_URL") + self.account_name = app_config.get("AZURE_BLOB_ACCOUNT_NAME") + self.account_key = app_config.get("AZURE_BLOB_ACCOUNT_KEY") def save(self, filename, data): client = self._sync_client() @@ -39,6 +38,7 @@ def generate(filename: str = filename) -> Generator: blob = client.get_blob_client(container=self.bucket_name, blob=filename) blob_data = blob.download_blob() yield from blob_data.chunks() + return generate(filename) def download(self, filename, target_filepath): @@ -62,17 +62,17 @@ def delete(self, filename): blob_container.delete_blob(filename) def _sync_client(self): - cache_key = 'azure_blob_sas_token_{}_{}'.format(self.account_name, self.account_key) + cache_key = "azure_blob_sas_token_{}_{}".format(self.account_name, self.account_key) cache_result = redis_client.get(cache_key) if cache_result is not None: - sas_token = cache_result.decode('utf-8') + sas_token = cache_result.decode("utf-8") else: sas_token = generate_account_sas( account_name=self.account_name, account_key=self.account_key, resource_types=ResourceTypes(service=True, container=True, object=True), permission=AccountSasPermissions(read=True, write=True, delete=True, list=True, add=True, create=True), - expiry=datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(hours=1) + expiry=datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(hours=1), ) redis_client.set(cache_key, sas_token, ex=3000) return BlobServiceClient(account_url=self.account_url, credential=sas_token) diff --git a/api/extensions/storage/base_storage.py b/api/extensions/storage/base_storage.py index 13d9c3429044c..c3fe9ec82a5b4 100644 --- a/api/extensions/storage/base_storage.py +++ b/api/extensions/storage/base_storage.py @@ -1,4 +1,5 @@ """Abstract interface for file storage implementations.""" + from abc import ABC, abstractmethod from collections.abc import Generator @@ -6,8 +7,8 @@ class BaseStorage(ABC): - """Interface for file storage. - """ + """Interface for file storage.""" + app = None def __init__(self, app: Flask): diff --git a/api/extensions/storage/google_storage.py b/api/extensions/storage/google_storage.py index ef6cd69039787..9ed1fcf0b4e11 100644 --- a/api/extensions/storage/google_storage.py +++ b/api/extensions/storage/google_storage.py @@ -11,16 +11,16 @@ class GoogleStorage(BaseStorage): - """Implementation for google storage. - """ + """Implementation for google storage.""" + def __init__(self, app: Flask): super().__init__(app) app_config = self.app.config - self.bucket_name = app_config.get('GOOGLE_STORAGE_BUCKET_NAME') - service_account_json_str = app_config.get('GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64') + self.bucket_name = app_config.get("GOOGLE_STORAGE_BUCKET_NAME") + service_account_json_str = app_config.get("GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64") # if service_account_json_str is empty, use Application Default Credentials if service_account_json_str: - service_account_json = base64.b64decode(service_account_json_str).decode('utf-8') + service_account_json = base64.b64decode(service_account_json_str).decode("utf-8") # convert str to object service_account_obj = json.loads(service_account_json) self.client = GoogleCloudStorage.Client.from_service_account_info(service_account_obj) @@ -43,9 +43,10 @@ def load_stream(self, filename: str) -> Generator: def generate(filename: str = filename) -> Generator: bucket = self.client.get_bucket(self.bucket_name) blob = bucket.get_blob(filename) - with closing(blob.open(mode='rb')) as blob_stream: + with closing(blob.open(mode="rb")) as blob_stream: while chunk := blob_stream.read(4096): yield chunk + return generate() def download(self, filename, target_filepath): @@ -60,4 +61,4 @@ def exists(self, filename): def delete(self, filename): bucket = self.client.get_bucket(self.bucket_name) - bucket.delete_blob(filename) \ No newline at end of file + bucket.delete_blob(filename) diff --git a/api/extensions/storage/local_storage.py b/api/extensions/storage/local_storage.py index 389ef12f82bcd..46ee4bf80f8e6 100644 --- a/api/extensions/storage/local_storage.py +++ b/api/extensions/storage/local_storage.py @@ -8,21 +8,20 @@ class LocalStorage(BaseStorage): - """Implementation for local storage. - """ + """Implementation for local storage.""" def __init__(self, app: Flask): super().__init__(app) - folder = self.app.config.get('STORAGE_LOCAL_PATH') + folder = self.app.config.get("STORAGE_LOCAL_PATH") if not os.path.isabs(folder): folder = os.path.join(app.root_path, folder) self.folder = folder def save(self, filename, data): - if not self.folder or self.folder.endswith('/'): + if not self.folder or self.folder.endswith("/"): filename = self.folder + filename else: - filename = self.folder + '/' + filename + filename = self.folder + "/" + filename folder = os.path.dirname(filename) os.makedirs(folder, exist_ok=True) @@ -31,10 +30,10 @@ def save(self, filename, data): f.write(data) def load_once(self, filename: str) -> bytes: - if not self.folder or self.folder.endswith('/'): + if not self.folder or self.folder.endswith("/"): filename = self.folder + filename else: - filename = self.folder + '/' + filename + filename = self.folder + "/" + filename if not os.path.exists(filename): raise FileNotFoundError("File not found") @@ -46,10 +45,10 @@ def load_once(self, filename: str) -> bytes: def load_stream(self, filename: str) -> Generator: def generate(filename: str = filename) -> Generator: - if not self.folder or self.folder.endswith('/'): + if not self.folder or self.folder.endswith("/"): filename = self.folder + filename else: - filename = self.folder + '/' + filename + filename = self.folder + "/" + filename if not os.path.exists(filename): raise FileNotFoundError("File not found") @@ -61,10 +60,10 @@ def generate(filename: str = filename) -> Generator: return generate() def download(self, filename, target_filepath): - if not self.folder or self.folder.endswith('/'): + if not self.folder or self.folder.endswith("/"): filename = self.folder + filename else: - filename = self.folder + '/' + filename + filename = self.folder + "/" + filename if not os.path.exists(filename): raise FileNotFoundError("File not found") @@ -72,17 +71,17 @@ def download(self, filename, target_filepath): shutil.copyfile(filename, target_filepath) def exists(self, filename): - if not self.folder or self.folder.endswith('/'): + if not self.folder or self.folder.endswith("/"): filename = self.folder + filename else: - filename = self.folder + '/' + filename + filename = self.folder + "/" + filename return os.path.exists(filename) def delete(self, filename): - if not self.folder or self.folder.endswith('/'): + if not self.folder or self.folder.endswith("/"): filename = self.folder + filename else: - filename = self.folder + '/' + filename + filename = self.folder + "/" + filename if os.path.exists(filename): os.remove(filename) diff --git a/api/extensions/storage/oci_storage.py b/api/extensions/storage/oci_storage.py index e78d870950339..e32fa0a0ae78a 100644 --- a/api/extensions/storage/oci_storage.py +++ b/api/extensions/storage/oci_storage.py @@ -12,14 +12,14 @@ class OCIStorage(BaseStorage): def __init__(self, app: Flask): super().__init__(app) app_config = self.app.config - self.bucket_name = app_config.get('OCI_BUCKET_NAME') + self.bucket_name = app_config.get("OCI_BUCKET_NAME") self.client = boto3.client( - 's3', - aws_secret_access_key=app_config.get('OCI_SECRET_KEY'), - aws_access_key_id=app_config.get('OCI_ACCESS_KEY'), - endpoint_url=app_config.get('OCI_ENDPOINT'), - region_name=app_config.get('OCI_REGION') - ) + "s3", + aws_secret_access_key=app_config.get("OCI_SECRET_KEY"), + aws_access_key_id=app_config.get("OCI_ACCESS_KEY"), + endpoint_url=app_config.get("OCI_ENDPOINT"), + region_name=app_config.get("OCI_REGION"), + ) def save(self, filename, data): self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data) @@ -27,9 +27,9 @@ def save(self, filename, data): def load_once(self, filename: str) -> bytes: try: with closing(self.client) as client: - data = client.get_object(Bucket=self.bucket_name, Key=filename)['Body'].read() + data = client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read() except ClientError as ex: - if ex.response['Error']['Code'] == 'NoSuchKey': + if ex.response["Error"]["Code"] == "NoSuchKey": raise FileNotFoundError("File not found") else: raise @@ -40,12 +40,13 @@ def generate(filename: str = filename) -> Generator: try: with closing(self.client) as client: response = client.get_object(Bucket=self.bucket_name, Key=filename) - yield from response['Body'].iter_chunks() + yield from response["Body"].iter_chunks() except ClientError as ex: - if ex.response['Error']['Code'] == 'NoSuchKey': + if ex.response["Error"]["Code"] == "NoSuchKey": raise FileNotFoundError("File not found") else: raise + return generate() def download(self, filename, target_filepath): @@ -61,4 +62,4 @@ def exists(self, filename): return False def delete(self, filename): - self.client.delete_object(Bucket=self.bucket_name, Key=filename) \ No newline at end of file + self.client.delete_object(Bucket=self.bucket_name, Key=filename) diff --git a/api/extensions/storage/s3_storage.py b/api/extensions/storage/s3_storage.py index 787596fa791d4..022ce5b14a7b8 100644 --- a/api/extensions/storage/s3_storage.py +++ b/api/extensions/storage/s3_storage.py @@ -10,24 +10,24 @@ class S3Storage(BaseStorage): - """Implementation for s3 storage. - """ + """Implementation for s3 storage.""" + def __init__(self, app: Flask): super().__init__(app) app_config = self.app.config - self.bucket_name = app_config.get('S3_BUCKET_NAME') - if app_config.get('S3_USE_AWS_MANAGED_IAM'): + self.bucket_name = app_config.get("S3_BUCKET_NAME") + if app_config.get("S3_USE_AWS_MANAGED_IAM"): session = boto3.Session() - self.client = session.client('s3') + self.client = session.client("s3") else: self.client = boto3.client( - 's3', - aws_secret_access_key=app_config.get('S3_SECRET_KEY'), - aws_access_key_id=app_config.get('S3_ACCESS_KEY'), - endpoint_url=app_config.get('S3_ENDPOINT'), - region_name=app_config.get('S3_REGION'), - config=Config(s3={'addressing_style': app_config.get('S3_ADDRESS_STYLE')}) - ) + "s3", + aws_secret_access_key=app_config.get("S3_SECRET_KEY"), + aws_access_key_id=app_config.get("S3_ACCESS_KEY"), + endpoint_url=app_config.get("S3_ENDPOINT"), + region_name=app_config.get("S3_REGION"), + config=Config(s3={"addressing_style": app_config.get("S3_ADDRESS_STYLE")}), + ) def save(self, filename, data): self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data) @@ -35,9 +35,9 @@ def save(self, filename, data): def load_once(self, filename: str) -> bytes: try: with closing(self.client) as client: - data = client.get_object(Bucket=self.bucket_name, Key=filename)['Body'].read() + data = client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read() except ClientError as ex: - if ex.response['Error']['Code'] == 'NoSuchKey': + if ex.response["Error"]["Code"] == "NoSuchKey": raise FileNotFoundError("File not found") else: raise @@ -48,12 +48,13 @@ def generate(filename: str = filename) -> Generator: try: with closing(self.client) as client: response = client.get_object(Bucket=self.bucket_name, Key=filename) - yield from response['Body'].iter_chunks() + yield from response["Body"].iter_chunks() except ClientError as ex: - if ex.response['Error']['Code'] == 'NoSuchKey': + if ex.response["Error"]["Code"] == "NoSuchKey": raise FileNotFoundError("File not found") else: raise + return generate() def download(self, filename, target_filepath): diff --git a/api/extensions/storage/tencent_storage.py b/api/extensions/storage/tencent_storage.py index e2c1ca55e3c43..1d499cd3bcea1 100644 --- a/api/extensions/storage/tencent_storage.py +++ b/api/extensions/storage/tencent_storage.py @@ -7,18 +7,17 @@ class TencentStorage(BaseStorage): - """Implementation for tencent cos storage. - """ + """Implementation for tencent cos storage.""" def __init__(self, app: Flask): super().__init__(app) app_config = self.app.config - self.bucket_name = app_config.get('TENCENT_COS_BUCKET_NAME') + self.bucket_name = app_config.get("TENCENT_COS_BUCKET_NAME") config = CosConfig( - Region=app_config.get('TENCENT_COS_REGION'), - SecretId=app_config.get('TENCENT_COS_SECRET_ID'), - SecretKey=app_config.get('TENCENT_COS_SECRET_KEY'), - Scheme=app_config.get('TENCENT_COS_SCHEME'), + Region=app_config.get("TENCENT_COS_REGION"), + SecretId=app_config.get("TENCENT_COS_SECRET_ID"), + SecretKey=app_config.get("TENCENT_COS_SECRET_KEY"), + Scheme=app_config.get("TENCENT_COS_SCHEME"), ) self.client = CosS3Client(config) @@ -26,19 +25,19 @@ def save(self, filename, data): self.client.put_object(Bucket=self.bucket_name, Body=data, Key=filename) def load_once(self, filename: str) -> bytes: - data = self.client.get_object(Bucket=self.bucket_name, Key=filename)['Body'].get_raw_stream().read() + data = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].get_raw_stream().read() return data def load_stream(self, filename: str) -> Generator: def generate(filename: str = filename) -> Generator: response = self.client.get_object(Bucket=self.bucket_name, Key=filename) - yield from response['Body'].get_stream(chunk_size=4096) + yield from response["Body"].get_stream(chunk_size=4096) return generate() def download(self, filename, target_filepath): response = self.client.get_object(Bucket=self.bucket_name, Key=filename) - response['Body'].get_stream_to_file(target_filepath) + response["Body"].get_stream_to_file(target_filepath) def exists(self, filename): return self.client.object_exists(Bucket=self.bucket_name, Key=filename) diff --git a/api/fields/annotation_fields.py b/api/fields/annotation_fields.py index c77808447519f..379dcc6d16fe5 100644 --- a/api/fields/annotation_fields.py +++ b/api/fields/annotation_fields.py @@ -5,7 +5,7 @@ annotation_fields = { "id": fields.String, "question": fields.String, - "answer": fields.Raw(attribute='content'), + "answer": fields.Raw(attribute="content"), "hit_count": fields.Integer, "created_at": TimestampField, # 'account': fields.Nested(simple_account_fields, allow_null=True) @@ -21,8 +21,8 @@ "score": fields.Float, "question": fields.String, "created_at": TimestampField, - "match": fields.String(attribute='annotation_question'), - "response": fields.String(attribute='annotation_content') + "match": fields.String(attribute="annotation_question"), + "response": fields.String(attribute="annotation_content"), } annotation_hit_history_list_fields = { diff --git a/api/fields/api_based_extension_fields.py b/api/fields/api_based_extension_fields.py index 749e9900de181..a85d4a34dbe7b 100644 --- a/api/fields/api_based_extension_fields.py +++ b/api/fields/api_based_extension_fields.py @@ -8,16 +8,16 @@ def output(self, key, obj): api_key = obj.api_key # If the length of the api_key is less than 8 characters, show the first and last characters if len(api_key) <= 8: - return api_key[0] + '******' + api_key[-1] + return api_key[0] + "******" + api_key[-1] # If the api_key is greater than 8 characters, show the first three and the last three characters else: - return api_key[:3] + '******' + api_key[-3:] + return api_key[:3] + "******" + api_key[-3:] api_based_extension_fields = { - 'id': fields.String, - 'name': fields.String, - 'api_endpoint': fields.String, - 'api_key': HiddenAPIKey, - 'created_at': TimestampField + "id": fields.String, + "name": fields.String, + "api_endpoint": fields.String, + "api_key": HiddenAPIKey, + "created_at": TimestampField, } diff --git a/api/fields/app_fields.py b/api/fields/app_fields.py index 94d804a919869..7036d58e4ab3a 100644 --- a/api/fields/app_fields.py +++ b/api/fields/app_fields.py @@ -3,157 +3,153 @@ from libs.helper import TimestampField app_detail_kernel_fields = { - 'id': fields.String, - 'name': fields.String, - 'description': fields.String, - 'mode': fields.String(attribute='mode_compatible_with_agent'), - 'icon': fields.String, - 'icon_background': fields.String, + "id": fields.String, + "name": fields.String, + "description": fields.String, + "mode": fields.String(attribute="mode_compatible_with_agent"), + "icon": fields.String, + "icon_background": fields.String, } related_app_list = { - 'data': fields.List(fields.Nested(app_detail_kernel_fields)), - 'total': fields.Integer, + "data": fields.List(fields.Nested(app_detail_kernel_fields)), + "total": fields.Integer, } model_config_fields = { - 'opening_statement': fields.String, - 'suggested_questions': fields.Raw(attribute='suggested_questions_list'), - 'suggested_questions_after_answer': fields.Raw(attribute='suggested_questions_after_answer_dict'), - 'speech_to_text': fields.Raw(attribute='speech_to_text_dict'), - 'text_to_speech': fields.Raw(attribute='text_to_speech_dict'), - 'retriever_resource': fields.Raw(attribute='retriever_resource_dict'), - 'annotation_reply': fields.Raw(attribute='annotation_reply_dict'), - 'more_like_this': fields.Raw(attribute='more_like_this_dict'), - 'sensitive_word_avoidance': fields.Raw(attribute='sensitive_word_avoidance_dict'), - 'external_data_tools': fields.Raw(attribute='external_data_tools_list'), - 'model': fields.Raw(attribute='model_dict'), - 'user_input_form': fields.Raw(attribute='user_input_form_list'), - 'dataset_query_variable': fields.String, - 'pre_prompt': fields.String, - 'agent_mode': fields.Raw(attribute='agent_mode_dict'), - 'prompt_type': fields.String, - 'chat_prompt_config': fields.Raw(attribute='chat_prompt_config_dict'), - 'completion_prompt_config': fields.Raw(attribute='completion_prompt_config_dict'), - 'dataset_configs': fields.Raw(attribute='dataset_configs_dict'), - 'file_upload': fields.Raw(attribute='file_upload_dict'), - 'created_at': TimestampField + "opening_statement": fields.String, + "suggested_questions": fields.Raw(attribute="suggested_questions_list"), + "suggested_questions_after_answer": fields.Raw(attribute="suggested_questions_after_answer_dict"), + "speech_to_text": fields.Raw(attribute="speech_to_text_dict"), + "text_to_speech": fields.Raw(attribute="text_to_speech_dict"), + "retriever_resource": fields.Raw(attribute="retriever_resource_dict"), + "annotation_reply": fields.Raw(attribute="annotation_reply_dict"), + "more_like_this": fields.Raw(attribute="more_like_this_dict"), + "sensitive_word_avoidance": fields.Raw(attribute="sensitive_word_avoidance_dict"), + "external_data_tools": fields.Raw(attribute="external_data_tools_list"), + "model": fields.Raw(attribute="model_dict"), + "user_input_form": fields.Raw(attribute="user_input_form_list"), + "dataset_query_variable": fields.String, + "pre_prompt": fields.String, + "agent_mode": fields.Raw(attribute="agent_mode_dict"), + "prompt_type": fields.String, + "chat_prompt_config": fields.Raw(attribute="chat_prompt_config_dict"), + "completion_prompt_config": fields.Raw(attribute="completion_prompt_config_dict"), + "dataset_configs": fields.Raw(attribute="dataset_configs_dict"), + "file_upload": fields.Raw(attribute="file_upload_dict"), + "created_at": TimestampField, } app_detail_fields = { - 'id': fields.String, - 'name': fields.String, - 'description': fields.String, - 'mode': fields.String(attribute='mode_compatible_with_agent'), - 'icon': fields.String, - 'icon_background': fields.String, - 'enable_site': fields.Boolean, - 'enable_api': fields.Boolean, - 'model_config': fields.Nested(model_config_fields, attribute='app_model_config', allow_null=True), - 'tracing': fields.Raw, - 'created_at': TimestampField + "id": fields.String, + "name": fields.String, + "description": fields.String, + "mode": fields.String(attribute="mode_compatible_with_agent"), + "icon": fields.String, + "icon_background": fields.String, + "enable_site": fields.Boolean, + "enable_api": fields.Boolean, + "model_config": fields.Nested(model_config_fields, attribute="app_model_config", allow_null=True), + "tracing": fields.Raw, + "created_at": TimestampField, } prompt_config_fields = { - 'prompt_template': fields.String, + "prompt_template": fields.String, } model_config_partial_fields = { - 'model': fields.Raw(attribute='model_dict'), - 'pre_prompt': fields.String, + "model": fields.Raw(attribute="model_dict"), + "pre_prompt": fields.String, } -tag_fields = { - 'id': fields.String, - 'name': fields.String, - 'type': fields.String -} +tag_fields = {"id": fields.String, "name": fields.String, "type": fields.String} app_partial_fields = { - 'id': fields.String, - 'name': fields.String, - 'max_active_requests': fields.Raw(), - 'description': fields.String(attribute='desc_or_prompt'), - 'mode': fields.String(attribute='mode_compatible_with_agent'), - 'icon': fields.String, - 'icon_background': fields.String, - 'model_config': fields.Nested(model_config_partial_fields, attribute='app_model_config', allow_null=True), - 'created_at': TimestampField, - 'tags': fields.List(fields.Nested(tag_fields)) + "id": fields.String, + "name": fields.String, + "max_active_requests": fields.Raw(), + "description": fields.String(attribute="desc_or_prompt"), + "mode": fields.String(attribute="mode_compatible_with_agent"), + "icon": fields.String, + "icon_background": fields.String, + "model_config": fields.Nested(model_config_partial_fields, attribute="app_model_config", allow_null=True), + "created_at": TimestampField, + "tags": fields.List(fields.Nested(tag_fields)), } app_pagination_fields = { - 'page': fields.Integer, - 'limit': fields.Integer(attribute='per_page'), - 'total': fields.Integer, - 'has_more': fields.Boolean(attribute='has_next'), - 'data': fields.List(fields.Nested(app_partial_fields), attribute='items') + "page": fields.Integer, + "limit": fields.Integer(attribute="per_page"), + "total": fields.Integer, + "has_more": fields.Boolean(attribute="has_next"), + "data": fields.List(fields.Nested(app_partial_fields), attribute="items"), } template_fields = { - 'name': fields.String, - 'icon': fields.String, - 'icon_background': fields.String, - 'description': fields.String, - 'mode': fields.String, - 'model_config': fields.Nested(model_config_fields), + "name": fields.String, + "icon": fields.String, + "icon_background": fields.String, + "description": fields.String, + "mode": fields.String, + "model_config": fields.Nested(model_config_fields), } template_list_fields = { - 'data': fields.List(fields.Nested(template_fields)), + "data": fields.List(fields.Nested(template_fields)), } site_fields = { - 'access_token': fields.String(attribute='code'), - 'code': fields.String, - 'title': fields.String, - 'icon': fields.String, - 'icon_background': fields.String, - 'description': fields.String, - 'default_language': fields.String, - 'chat_color_theme': fields.String, - 'chat_color_theme_inverted': fields.Boolean, - 'customize_domain': fields.String, - 'copyright': fields.String, - 'privacy_policy': fields.String, - 'custom_disclaimer': fields.String, - 'customize_token_strategy': fields.String, - 'prompt_public': fields.Boolean, - 'app_base_url': fields.String, - 'show_workflow_steps': fields.Boolean, + "access_token": fields.String(attribute="code"), + "code": fields.String, + "title": fields.String, + "icon": fields.String, + "icon_background": fields.String, + "description": fields.String, + "default_language": fields.String, + "chat_color_theme": fields.String, + "chat_color_theme_inverted": fields.Boolean, + "customize_domain": fields.String, + "copyright": fields.String, + "privacy_policy": fields.String, + "custom_disclaimer": fields.String, + "customize_token_strategy": fields.String, + "prompt_public": fields.Boolean, + "app_base_url": fields.String, + "show_workflow_steps": fields.Boolean, } app_detail_fields_with_site = { - 'id': fields.String, - 'name': fields.String, - 'description': fields.String, - 'mode': fields.String(attribute='mode_compatible_with_agent'), - 'icon': fields.String, - 'icon_background': fields.String, - 'enable_site': fields.Boolean, - 'enable_api': fields.Boolean, - 'model_config': fields.Nested(model_config_fields, attribute='app_model_config', allow_null=True), - 'site': fields.Nested(site_fields), - 'api_base_url': fields.String, - 'created_at': TimestampField, - 'deleted_tools': fields.List(fields.String), + "id": fields.String, + "name": fields.String, + "description": fields.String, + "mode": fields.String(attribute="mode_compatible_with_agent"), + "icon": fields.String, + "icon_background": fields.String, + "enable_site": fields.Boolean, + "enable_api": fields.Boolean, + "model_config": fields.Nested(model_config_fields, attribute="app_model_config", allow_null=True), + "site": fields.Nested(site_fields), + "api_base_url": fields.String, + "created_at": TimestampField, + "deleted_tools": fields.List(fields.String), } app_site_fields = { - 'app_id': fields.String, - 'access_token': fields.String(attribute='code'), - 'code': fields.String, - 'title': fields.String, - 'icon': fields.String, - 'icon_background': fields.String, - 'description': fields.String, - 'default_language': fields.String, - 'customize_domain': fields.String, - 'copyright': fields.String, - 'privacy_policy': fields.String, - 'custom_disclaimer': fields.String, - 'customize_token_strategy': fields.String, - 'prompt_public': fields.Boolean, - 'show_workflow_steps': fields.Boolean, + "app_id": fields.String, + "access_token": fields.String(attribute="code"), + "code": fields.String, + "title": fields.String, + "icon": fields.String, + "icon_background": fields.String, + "description": fields.String, + "default_language": fields.String, + "customize_domain": fields.String, + "copyright": fields.String, + "privacy_policy": fields.String, + "custom_disclaimer": fields.String, + "customize_token_strategy": fields.String, + "prompt_public": fields.Boolean, + "show_workflow_steps": fields.Boolean, } diff --git a/api/fields/conversation_fields.py b/api/fields/conversation_fields.py index 79ceb02685279..1b15fe38800b3 100644 --- a/api/fields/conversation_fields.py +++ b/api/fields/conversation_fields.py @@ -6,205 +6,202 @@ class MessageTextField(fields.Raw): def format(self, value): - return value[0]['text'] if value else '' + return value[0]["text"] if value else "" feedback_fields = { - 'rating': fields.String, - 'content': fields.String, - 'from_source': fields.String, - 'from_end_user_id': fields.String, - 'from_account': fields.Nested(simple_account_fields, allow_null=True), + "rating": fields.String, + "content": fields.String, + "from_source": fields.String, + "from_end_user_id": fields.String, + "from_account": fields.Nested(simple_account_fields, allow_null=True), } annotation_fields = { - 'id': fields.String, - 'question': fields.String, - 'content': fields.String, - 'account': fields.Nested(simple_account_fields, allow_null=True), - 'created_at': TimestampField + "id": fields.String, + "question": fields.String, + "content": fields.String, + "account": fields.Nested(simple_account_fields, allow_null=True), + "created_at": TimestampField, } annotation_hit_history_fields = { - 'annotation_id': fields.String(attribute='id'), - 'annotation_create_account': fields.Nested(simple_account_fields, allow_null=True), - 'created_at': TimestampField + "annotation_id": fields.String(attribute="id"), + "annotation_create_account": fields.Nested(simple_account_fields, allow_null=True), + "created_at": TimestampField, } message_file_fields = { - 'id': fields.String, - 'type': fields.String, - 'url': fields.String, - 'belongs_to': fields.String(default='user'), + "id": fields.String, + "type": fields.String, + "url": fields.String, + "belongs_to": fields.String(default="user"), } agent_thought_fields = { - 'id': fields.String, - 'chain_id': fields.String, - 'message_id': fields.String, - 'position': fields.Integer, - 'thought': fields.String, - 'tool': fields.String, - 'tool_labels': fields.Raw, - 'tool_input': fields.String, - 'created_at': TimestampField, - 'observation': fields.String, - 'files': fields.List(fields.String), + "id": fields.String, + "chain_id": fields.String, + "message_id": fields.String, + "position": fields.Integer, + "thought": fields.String, + "tool": fields.String, + "tool_labels": fields.Raw, + "tool_input": fields.String, + "created_at": TimestampField, + "observation": fields.String, + "files": fields.List(fields.String), } message_detail_fields = { - 'id': fields.String, - 'conversation_id': fields.String, - 'inputs': fields.Raw, - 'query': fields.String, - 'message': fields.Raw, - 'message_tokens': fields.Integer, - 'answer': fields.String(attribute='re_sign_file_url_answer'), - 'answer_tokens': fields.Integer, - 'provider_response_latency': fields.Float, - 'from_source': fields.String, - 'from_end_user_id': fields.String, - 'from_account_id': fields.String, - 'feedbacks': fields.List(fields.Nested(feedback_fields)), - 'workflow_run_id': fields.String, - 'annotation': fields.Nested(annotation_fields, allow_null=True), - 'annotation_hit_history': fields.Nested(annotation_hit_history_fields, allow_null=True), - 'created_at': TimestampField, - 'agent_thoughts': fields.List(fields.Nested(agent_thought_fields)), - 'message_files': fields.List(fields.Nested(message_file_fields), attribute='files'), - 'metadata': fields.Raw(attribute='message_metadata_dict'), - 'status': fields.String, - 'error': fields.String, -} - -feedback_stat_fields = { - 'like': fields.Integer, - 'dislike': fields.Integer -} + "id": fields.String, + "conversation_id": fields.String, + "inputs": fields.Raw, + "query": fields.String, + "message": fields.Raw, + "message_tokens": fields.Integer, + "answer": fields.String(attribute="re_sign_file_url_answer"), + "answer_tokens": fields.Integer, + "provider_response_latency": fields.Float, + "from_source": fields.String, + "from_end_user_id": fields.String, + "from_account_id": fields.String, + "feedbacks": fields.List(fields.Nested(feedback_fields)), + "workflow_run_id": fields.String, + "annotation": fields.Nested(annotation_fields, allow_null=True), + "annotation_hit_history": fields.Nested(annotation_hit_history_fields, allow_null=True), + "created_at": TimestampField, + "agent_thoughts": fields.List(fields.Nested(agent_thought_fields)), + "message_files": fields.List(fields.Nested(message_file_fields), attribute="files"), + "metadata": fields.Raw(attribute="message_metadata_dict"), + "status": fields.String, + "error": fields.String, +} + +feedback_stat_fields = {"like": fields.Integer, "dislike": fields.Integer} model_config_fields = { - 'opening_statement': fields.String, - 'suggested_questions': fields.Raw, - 'model': fields.Raw, - 'user_input_form': fields.Raw, - 'pre_prompt': fields.String, - 'agent_mode': fields.Raw, + "opening_statement": fields.String, + "suggested_questions": fields.Raw, + "model": fields.Raw, + "user_input_form": fields.Raw, + "pre_prompt": fields.String, + "agent_mode": fields.Raw, } simple_configs_fields = { - 'prompt_template': fields.String, + "prompt_template": fields.String, } simple_model_config_fields = { - 'model': fields.Raw(attribute='model_dict'), - 'pre_prompt': fields.String, + "model": fields.Raw(attribute="model_dict"), + "pre_prompt": fields.String, } simple_message_detail_fields = { - 'inputs': fields.Raw, - 'query': fields.String, - 'message': MessageTextField, - 'answer': fields.String, + "inputs": fields.Raw, + "query": fields.String, + "message": MessageTextField, + "answer": fields.String, } conversation_fields = { - 'id': fields.String, - 'status': fields.String, - 'from_source': fields.String, - 'from_end_user_id': fields.String, - 'from_end_user_session_id': fields.String(), - 'from_account_id': fields.String, - 'read_at': TimestampField, - 'created_at': TimestampField, - 'annotation': fields.Nested(annotation_fields, allow_null=True), - 'model_config': fields.Nested(simple_model_config_fields), - 'user_feedback_stats': fields.Nested(feedback_stat_fields), - 'admin_feedback_stats': fields.Nested(feedback_stat_fields), - 'message': fields.Nested(simple_message_detail_fields, attribute='first_message') + "id": fields.String, + "status": fields.String, + "from_source": fields.String, + "from_end_user_id": fields.String, + "from_end_user_session_id": fields.String(), + "from_account_id": fields.String, + "read_at": TimestampField, + "created_at": TimestampField, + "annotation": fields.Nested(annotation_fields, allow_null=True), + "model_config": fields.Nested(simple_model_config_fields), + "user_feedback_stats": fields.Nested(feedback_stat_fields), + "admin_feedback_stats": fields.Nested(feedback_stat_fields), + "message": fields.Nested(simple_message_detail_fields, attribute="first_message"), } conversation_pagination_fields = { - 'page': fields.Integer, - 'limit': fields.Integer(attribute='per_page'), - 'total': fields.Integer, - 'has_more': fields.Boolean(attribute='has_next'), - 'data': fields.List(fields.Nested(conversation_fields), attribute='items') + "page": fields.Integer, + "limit": fields.Integer(attribute="per_page"), + "total": fields.Integer, + "has_more": fields.Boolean(attribute="has_next"), + "data": fields.List(fields.Nested(conversation_fields), attribute="items"), } conversation_message_detail_fields = { - 'id': fields.String, - 'status': fields.String, - 'from_source': fields.String, - 'from_end_user_id': fields.String, - 'from_account_id': fields.String, - 'created_at': TimestampField, - 'model_config': fields.Nested(model_config_fields), - 'message': fields.Nested(message_detail_fields, attribute='first_message'), + "id": fields.String, + "status": fields.String, + "from_source": fields.String, + "from_end_user_id": fields.String, + "from_account_id": fields.String, + "created_at": TimestampField, + "model_config": fields.Nested(model_config_fields), + "message": fields.Nested(message_detail_fields, attribute="first_message"), } conversation_with_summary_fields = { - 'id': fields.String, - 'status': fields.String, - 'from_source': fields.String, - 'from_end_user_id': fields.String, - 'from_end_user_session_id': fields.String, - 'from_account_id': fields.String, - 'name': fields.String, - 'summary': fields.String(attribute='summary_or_query'), - 'read_at': TimestampField, - 'created_at': TimestampField, - 'annotated': fields.Boolean, - 'model_config': fields.Nested(simple_model_config_fields), - 'message_count': fields.Integer, - 'user_feedback_stats': fields.Nested(feedback_stat_fields), - 'admin_feedback_stats': fields.Nested(feedback_stat_fields) + "id": fields.String, + "status": fields.String, + "from_source": fields.String, + "from_end_user_id": fields.String, + "from_end_user_session_id": fields.String, + "from_account_id": fields.String, + "name": fields.String, + "summary": fields.String(attribute="summary_or_query"), + "read_at": TimestampField, + "created_at": TimestampField, + "annotated": fields.Boolean, + "model_config": fields.Nested(simple_model_config_fields), + "message_count": fields.Integer, + "user_feedback_stats": fields.Nested(feedback_stat_fields), + "admin_feedback_stats": fields.Nested(feedback_stat_fields), } conversation_with_summary_pagination_fields = { - 'page': fields.Integer, - 'limit': fields.Integer(attribute='per_page'), - 'total': fields.Integer, - 'has_more': fields.Boolean(attribute='has_next'), - 'data': fields.List(fields.Nested(conversation_with_summary_fields), attribute='items') + "page": fields.Integer, + "limit": fields.Integer(attribute="per_page"), + "total": fields.Integer, + "has_more": fields.Boolean(attribute="has_next"), + "data": fields.List(fields.Nested(conversation_with_summary_fields), attribute="items"), } conversation_detail_fields = { - 'id': fields.String, - 'status': fields.String, - 'from_source': fields.String, - 'from_end_user_id': fields.String, - 'from_account_id': fields.String, - 'created_at': TimestampField, - 'annotated': fields.Boolean, - 'introduction': fields.String, - 'model_config': fields.Nested(model_config_fields), - 'message_count': fields.Integer, - 'user_feedback_stats': fields.Nested(feedback_stat_fields), - 'admin_feedback_stats': fields.Nested(feedback_stat_fields) + "id": fields.String, + "status": fields.String, + "from_source": fields.String, + "from_end_user_id": fields.String, + "from_account_id": fields.String, + "created_at": TimestampField, + "annotated": fields.Boolean, + "introduction": fields.String, + "model_config": fields.Nested(model_config_fields), + "message_count": fields.Integer, + "user_feedback_stats": fields.Nested(feedback_stat_fields), + "admin_feedback_stats": fields.Nested(feedback_stat_fields), } simple_conversation_fields = { - 'id': fields.String, - 'name': fields.String, - 'inputs': fields.Raw, - 'status': fields.String, - 'introduction': fields.String, - 'created_at': TimestampField + "id": fields.String, + "name": fields.String, + "inputs": fields.Raw, + "status": fields.String, + "introduction": fields.String, + "created_at": TimestampField, } conversation_infinite_scroll_pagination_fields = { - 'limit': fields.Integer, - 'has_more': fields.Boolean, - 'data': fields.List(fields.Nested(simple_conversation_fields)) + "limit": fields.Integer, + "has_more": fields.Boolean, + "data": fields.List(fields.Nested(simple_conversation_fields)), } conversation_with_model_config_fields = { **simple_conversation_fields, - 'model_config': fields.Raw, + "model_config": fields.Raw, } conversation_with_model_config_infinite_scroll_pagination_fields = { - 'limit': fields.Integer, - 'has_more': fields.Boolean, - 'data': fields.List(fields.Nested(conversation_with_model_config_fields)) + "limit": fields.Integer, + "has_more": fields.Boolean, + "data": fields.List(fields.Nested(conversation_with_model_config_fields)), } diff --git a/api/fields/conversation_variable_fields.py b/api/fields/conversation_variable_fields.py index 782a848c1a43f..983e50e73ceb9 100644 --- a/api/fields/conversation_variable_fields.py +++ b/api/fields/conversation_variable_fields.py @@ -3,19 +3,19 @@ from libs.helper import TimestampField conversation_variable_fields = { - 'id': fields.String, - 'name': fields.String, - 'value_type': fields.String(attribute='value_type.value'), - 'value': fields.String, - 'description': fields.String, - 'created_at': TimestampField, - 'updated_at': TimestampField, + "id": fields.String, + "name": fields.String, + "value_type": fields.String(attribute="value_type.value"), + "value": fields.String, + "description": fields.String, + "created_at": TimestampField, + "updated_at": TimestampField, } paginated_conversation_variable_fields = { - 'page': fields.Integer, - 'limit': fields.Integer, - 'total': fields.Integer, - 'has_more': fields.Boolean, - 'data': fields.List(fields.Nested(conversation_variable_fields), attribute='data'), + "page": fields.Integer, + "limit": fields.Integer, + "total": fields.Integer, + "has_more": fields.Boolean, + "data": fields.List(fields.Nested(conversation_variable_fields), attribute="data"), } diff --git a/api/fields/data_source_fields.py b/api/fields/data_source_fields.py index 6f3c920c85b60..071071376fe6c 100644 --- a/api/fields/data_source_fields.py +++ b/api/fields/data_source_fields.py @@ -2,64 +2,56 @@ from libs.helper import TimestampField -integrate_icon_fields = { - 'type': fields.String, - 'url': fields.String, - 'emoji': fields.String -} +integrate_icon_fields = {"type": fields.String, "url": fields.String, "emoji": fields.String} integrate_page_fields = { - 'page_name': fields.String, - 'page_id': fields.String, - 'page_icon': fields.Nested(integrate_icon_fields, allow_null=True), - 'is_bound': fields.Boolean, - 'parent_id': fields.String, - 'type': fields.String + "page_name": fields.String, + "page_id": fields.String, + "page_icon": fields.Nested(integrate_icon_fields, allow_null=True), + "is_bound": fields.Boolean, + "parent_id": fields.String, + "type": fields.String, } integrate_workspace_fields = { - 'workspace_name': fields.String, - 'workspace_id': fields.String, - 'workspace_icon': fields.String, - 'pages': fields.List(fields.Nested(integrate_page_fields)) + "workspace_name": fields.String, + "workspace_id": fields.String, + "workspace_icon": fields.String, + "pages": fields.List(fields.Nested(integrate_page_fields)), } integrate_notion_info_list_fields = { - 'notion_info': fields.List(fields.Nested(integrate_workspace_fields)), + "notion_info": fields.List(fields.Nested(integrate_workspace_fields)), } -integrate_icon_fields = { - 'type': fields.String, - 'url': fields.String, - 'emoji': fields.String -} +integrate_icon_fields = {"type": fields.String, "url": fields.String, "emoji": fields.String} integrate_page_fields = { - 'page_name': fields.String, - 'page_id': fields.String, - 'page_icon': fields.Nested(integrate_icon_fields, allow_null=True), - 'parent_id': fields.String, - 'type': fields.String + "page_name": fields.String, + "page_id": fields.String, + "page_icon": fields.Nested(integrate_icon_fields, allow_null=True), + "parent_id": fields.String, + "type": fields.String, } integrate_workspace_fields = { - 'workspace_name': fields.String, - 'workspace_id': fields.String, - 'workspace_icon': fields.String, - 'pages': fields.List(fields.Nested(integrate_page_fields)), - 'total': fields.Integer + "workspace_name": fields.String, + "workspace_id": fields.String, + "workspace_icon": fields.String, + "pages": fields.List(fields.Nested(integrate_page_fields)), + "total": fields.Integer, } integrate_fields = { - 'id': fields.String, - 'provider': fields.String, - 'created_at': TimestampField, - 'is_bound': fields.Boolean, - 'disabled': fields.Boolean, - 'link': fields.String, - 'source_info': fields.Nested(integrate_workspace_fields) + "id": fields.String, + "provider": fields.String, + "created_at": TimestampField, + "is_bound": fields.Boolean, + "disabled": fields.Boolean, + "link": fields.String, + "source_info": fields.Nested(integrate_workspace_fields), } integrate_list_fields = { - 'data': fields.List(fields.Nested(integrate_fields)), -} \ No newline at end of file + "data": fields.List(fields.Nested(integrate_fields)), +} diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py index a9f79b5c678e7..9cf8da7acdc98 100644 --- a/api/fields/dataset_fields.py +++ b/api/fields/dataset_fields.py @@ -3,73 +3,64 @@ from libs.helper import TimestampField dataset_fields = { - 'id': fields.String, - 'name': fields.String, - 'description': fields.String, - 'permission': fields.String, - 'data_source_type': fields.String, - 'indexing_technique': fields.String, - 'created_by': fields.String, - 'created_at': TimestampField, + "id": fields.String, + "name": fields.String, + "description": fields.String, + "permission": fields.String, + "data_source_type": fields.String, + "indexing_technique": fields.String, + "created_by": fields.String, + "created_at": TimestampField, } -reranking_model_fields = { - 'reranking_provider_name': fields.String, - 'reranking_model_name': fields.String -} +reranking_model_fields = {"reranking_provider_name": fields.String, "reranking_model_name": fields.String} -keyword_setting_fields = { - 'keyword_weight': fields.Float -} +keyword_setting_fields = {"keyword_weight": fields.Float} vector_setting_fields = { - 'vector_weight': fields.Float, - 'embedding_model_name': fields.String, - 'embedding_provider_name': fields.String, + "vector_weight": fields.Float, + "embedding_model_name": fields.String, + "embedding_provider_name": fields.String, } weighted_score_fields = { - 'keyword_setting': fields.Nested(keyword_setting_fields), - 'vector_setting': fields.Nested(vector_setting_fields), + "keyword_setting": fields.Nested(keyword_setting_fields), + "vector_setting": fields.Nested(vector_setting_fields), } dataset_retrieval_model_fields = { - 'search_method': fields.String, - 'reranking_enable': fields.Boolean, - 'reranking_mode': fields.String, - 'reranking_model': fields.Nested(reranking_model_fields), - 'weights': fields.Nested(weighted_score_fields, allow_null=True), - 'top_k': fields.Integer, - 'score_threshold_enabled': fields.Boolean, - 'score_threshold': fields.Float + "search_method": fields.String, + "reranking_enable": fields.Boolean, + "reranking_mode": fields.String, + "reranking_model": fields.Nested(reranking_model_fields), + "weights": fields.Nested(weighted_score_fields, allow_null=True), + "top_k": fields.Integer, + "score_threshold_enabled": fields.Boolean, + "score_threshold": fields.Float, } -tag_fields = { - 'id': fields.String, - 'name': fields.String, - 'type': fields.String -} +tag_fields = {"id": fields.String, "name": fields.String, "type": fields.String} dataset_detail_fields = { - 'id': fields.String, - 'name': fields.String, - 'description': fields.String, - 'provider': fields.String, - 'permission': fields.String, - 'data_source_type': fields.String, - 'indexing_technique': fields.String, - 'app_count': fields.Integer, - 'document_count': fields.Integer, - 'word_count': fields.Integer, - 'created_by': fields.String, - 'created_at': TimestampField, - 'updated_by': fields.String, - 'updated_at': TimestampField, - 'embedding_model': fields.String, - 'embedding_model_provider': fields.String, - 'embedding_available': fields.Boolean, - 'retrieval_model_dict': fields.Nested(dataset_retrieval_model_fields), - 'tags': fields.List(fields.Nested(tag_fields)) + "id": fields.String, + "name": fields.String, + "description": fields.String, + "provider": fields.String, + "permission": fields.String, + "data_source_type": fields.String, + "indexing_technique": fields.String, + "app_count": fields.Integer, + "document_count": fields.Integer, + "word_count": fields.Integer, + "created_by": fields.String, + "created_at": TimestampField, + "updated_by": fields.String, + "updated_at": TimestampField, + "embedding_model": fields.String, + "embedding_model_provider": fields.String, + "embedding_available": fields.Boolean, + "retrieval_model_dict": fields.Nested(dataset_retrieval_model_fields), + "tags": fields.List(fields.Nested(tag_fields)), } dataset_query_detail_fields = { @@ -79,7 +70,5 @@ "source_app_id": fields.String, "created_by_role": fields.String, "created_by": fields.String, - "created_at": TimestampField + "created_at": TimestampField, } - - diff --git a/api/fields/document_fields.py b/api/fields/document_fields.py index e8215255b35d5..a83ec7bc97ade 100644 --- a/api/fields/document_fields.py +++ b/api/fields/document_fields.py @@ -4,75 +4,73 @@ from libs.helper import TimestampField document_fields = { - 'id': fields.String, - 'position': fields.Integer, - 'data_source_type': fields.String, - 'data_source_info': fields.Raw(attribute='data_source_info_dict'), - 'data_source_detail_dict': fields.Raw(attribute='data_source_detail_dict'), - 'dataset_process_rule_id': fields.String, - 'name': fields.String, - 'created_from': fields.String, - 'created_by': fields.String, - 'created_at': TimestampField, - 'tokens': fields.Integer, - 'indexing_status': fields.String, - 'error': fields.String, - 'enabled': fields.Boolean, - 'disabled_at': TimestampField, - 'disabled_by': fields.String, - 'archived': fields.Boolean, - 'display_status': fields.String, - 'word_count': fields.Integer, - 'hit_count': fields.Integer, - 'doc_form': fields.String, + "id": fields.String, + "position": fields.Integer, + "data_source_type": fields.String, + "data_source_info": fields.Raw(attribute="data_source_info_dict"), + "data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"), + "dataset_process_rule_id": fields.String, + "name": fields.String, + "created_from": fields.String, + "created_by": fields.String, + "created_at": TimestampField, + "tokens": fields.Integer, + "indexing_status": fields.String, + "error": fields.String, + "enabled": fields.Boolean, + "disabled_at": TimestampField, + "disabled_by": fields.String, + "archived": fields.Boolean, + "display_status": fields.String, + "word_count": fields.Integer, + "hit_count": fields.Integer, + "doc_form": fields.String, } document_with_segments_fields = { - 'id': fields.String, - 'position': fields.Integer, - 'data_source_type': fields.String, - 'data_source_info': fields.Raw(attribute='data_source_info_dict'), - 'data_source_detail_dict': fields.Raw(attribute='data_source_detail_dict'), - 'dataset_process_rule_id': fields.String, - 'name': fields.String, - 'created_from': fields.String, - 'created_by': fields.String, - 'created_at': TimestampField, - 'tokens': fields.Integer, - 'indexing_status': fields.String, - 'error': fields.String, - 'enabled': fields.Boolean, - 'disabled_at': TimestampField, - 'disabled_by': fields.String, - 'archived': fields.Boolean, - 'display_status': fields.String, - 'word_count': fields.Integer, - 'hit_count': fields.Integer, - 'completed_segments': fields.Integer, - 'total_segments': fields.Integer + "id": fields.String, + "position": fields.Integer, + "data_source_type": fields.String, + "data_source_info": fields.Raw(attribute="data_source_info_dict"), + "data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"), + "dataset_process_rule_id": fields.String, + "name": fields.String, + "created_from": fields.String, + "created_by": fields.String, + "created_at": TimestampField, + "tokens": fields.Integer, + "indexing_status": fields.String, + "error": fields.String, + "enabled": fields.Boolean, + "disabled_at": TimestampField, + "disabled_by": fields.String, + "archived": fields.Boolean, + "display_status": fields.String, + "word_count": fields.Integer, + "hit_count": fields.Integer, + "completed_segments": fields.Integer, + "total_segments": fields.Integer, } dataset_and_document_fields = { - 'dataset': fields.Nested(dataset_fields), - 'documents': fields.List(fields.Nested(document_fields)), - 'batch': fields.String + "dataset": fields.Nested(dataset_fields), + "documents": fields.List(fields.Nested(document_fields)), + "batch": fields.String, } document_status_fields = { - 'id': fields.String, - 'indexing_status': fields.String, - 'processing_started_at': TimestampField, - 'parsing_completed_at': TimestampField, - 'cleaning_completed_at': TimestampField, - 'splitting_completed_at': TimestampField, - 'completed_at': TimestampField, - 'paused_at': TimestampField, - 'error': fields.String, - 'stopped_at': TimestampField, - 'completed_segments': fields.Integer, - 'total_segments': fields.Integer, + "id": fields.String, + "indexing_status": fields.String, + "processing_started_at": TimestampField, + "parsing_completed_at": TimestampField, + "cleaning_completed_at": TimestampField, + "splitting_completed_at": TimestampField, + "completed_at": TimestampField, + "paused_at": TimestampField, + "error": fields.String, + "stopped_at": TimestampField, + "completed_segments": fields.Integer, + "total_segments": fields.Integer, } -document_status_fields_list = { - 'data': fields.List(fields.Nested(document_status_fields)) -} \ No newline at end of file +document_status_fields_list = {"data": fields.List(fields.Nested(document_status_fields))} diff --git a/api/fields/end_user_fields.py b/api/fields/end_user_fields.py index ee630c12c2e9a..99e529f9d1c07 100644 --- a/api/fields/end_user_fields.py +++ b/api/fields/end_user_fields.py @@ -1,8 +1,8 @@ from flask_restful import fields simple_end_user_fields = { - 'id': fields.String, - 'type': fields.String, - 'is_anonymous': fields.Boolean, - 'session_id': fields.String, + "id": fields.String, + "type": fields.String, + "is_anonymous": fields.Boolean, + "session_id": fields.String, } diff --git a/api/fields/file_fields.py b/api/fields/file_fields.py index 2ef379dabc0d0..e5a03ce77ed5f 100644 --- a/api/fields/file_fields.py +++ b/api/fields/file_fields.py @@ -3,17 +3,17 @@ from libs.helper import TimestampField upload_config_fields = { - 'file_size_limit': fields.Integer, - 'batch_count_limit': fields.Integer, - 'image_file_size_limit': fields.Integer, + "file_size_limit": fields.Integer, + "batch_count_limit": fields.Integer, + "image_file_size_limit": fields.Integer, } file_fields = { - 'id': fields.String, - 'name': fields.String, - 'size': fields.Integer, - 'extension': fields.String, - 'mime_type': fields.String, - 'created_by': fields.String, - 'created_at': TimestampField, -} \ No newline at end of file + "id": fields.String, + "name": fields.String, + "size": fields.Integer, + "extension": fields.String, + "mime_type": fields.String, + "created_by": fields.String, + "created_at": TimestampField, +} diff --git a/api/fields/hit_testing_fields.py b/api/fields/hit_testing_fields.py index 541e56a378dae..f36e80f8d493d 100644 --- a/api/fields/hit_testing_fields.py +++ b/api/fields/hit_testing_fields.py @@ -3,39 +3,39 @@ from libs.helper import TimestampField document_fields = { - 'id': fields.String, - 'data_source_type': fields.String, - 'name': fields.String, - 'doc_type': fields.String, + "id": fields.String, + "data_source_type": fields.String, + "name": fields.String, + "doc_type": fields.String, } segment_fields = { - 'id': fields.String, - 'position': fields.Integer, - 'document_id': fields.String, - 'content': fields.String, - 'answer': fields.String, - 'word_count': fields.Integer, - 'tokens': fields.Integer, - 'keywords': fields.List(fields.String), - 'index_node_id': fields.String, - 'index_node_hash': fields.String, - 'hit_count': fields.Integer, - 'enabled': fields.Boolean, - 'disabled_at': TimestampField, - 'disabled_by': fields.String, - 'status': fields.String, - 'created_by': fields.String, - 'created_at': TimestampField, - 'indexing_at': TimestampField, - 'completed_at': TimestampField, - 'error': fields.String, - 'stopped_at': TimestampField, - 'document': fields.Nested(document_fields), + "id": fields.String, + "position": fields.Integer, + "document_id": fields.String, + "content": fields.String, + "answer": fields.String, + "word_count": fields.Integer, + "tokens": fields.Integer, + "keywords": fields.List(fields.String), + "index_node_id": fields.String, + "index_node_hash": fields.String, + "hit_count": fields.Integer, + "enabled": fields.Boolean, + "disabled_at": TimestampField, + "disabled_by": fields.String, + "status": fields.String, + "created_by": fields.String, + "created_at": TimestampField, + "indexing_at": TimestampField, + "completed_at": TimestampField, + "error": fields.String, + "stopped_at": TimestampField, + "document": fields.Nested(document_fields), } hit_testing_record_fields = { - 'segment': fields.Nested(segment_fields), - 'score': fields.Float, - 'tsne_position': fields.Raw -} \ No newline at end of file + "segment": fields.Nested(segment_fields), + "score": fields.Float, + "tsne_position": fields.Raw, +} diff --git a/api/fields/installed_app_fields.py b/api/fields/installed_app_fields.py index 35cc5a64755ec..b87cc653240a7 100644 --- a/api/fields/installed_app_fields.py +++ b/api/fields/installed_app_fields.py @@ -3,23 +3,21 @@ from libs.helper import TimestampField app_fields = { - 'id': fields.String, - 'name': fields.String, - 'mode': fields.String, - 'icon': fields.String, - 'icon_background': fields.String + "id": fields.String, + "name": fields.String, + "mode": fields.String, + "icon": fields.String, + "icon_background": fields.String, } installed_app_fields = { - 'id': fields.String, - 'app': fields.Nested(app_fields), - 'app_owner_tenant_id': fields.String, - 'is_pinned': fields.Boolean, - 'last_used_at': TimestampField, - 'editable': fields.Boolean, - 'uninstallable': fields.Boolean + "id": fields.String, + "app": fields.Nested(app_fields), + "app_owner_tenant_id": fields.String, + "is_pinned": fields.Boolean, + "last_used_at": TimestampField, + "editable": fields.Boolean, + "uninstallable": fields.Boolean, } -installed_app_list_fields = { - 'installed_apps': fields.List(fields.Nested(installed_app_fields)) -} \ No newline at end of file +installed_app_list_fields = {"installed_apps": fields.List(fields.Nested(installed_app_fields))} diff --git a/api/fields/member_fields.py b/api/fields/member_fields.py index d061b59c34702..1cf8e408d13d3 100644 --- a/api/fields/member_fields.py +++ b/api/fields/member_fields.py @@ -2,38 +2,32 @@ from libs.helper import TimestampField -simple_account_fields = { - 'id': fields.String, - 'name': fields.String, - 'email': fields.String -} +simple_account_fields = {"id": fields.String, "name": fields.String, "email": fields.String} account_fields = { - 'id': fields.String, - 'name': fields.String, - 'avatar': fields.String, - 'email': fields.String, - 'is_password_set': fields.Boolean, - 'interface_language': fields.String, - 'interface_theme': fields.String, - 'timezone': fields.String, - 'last_login_at': TimestampField, - 'last_login_ip': fields.String, - 'created_at': TimestampField + "id": fields.String, + "name": fields.String, + "avatar": fields.String, + "email": fields.String, + "is_password_set": fields.Boolean, + "interface_language": fields.String, + "interface_theme": fields.String, + "timezone": fields.String, + "last_login_at": TimestampField, + "last_login_ip": fields.String, + "created_at": TimestampField, } account_with_role_fields = { - 'id': fields.String, - 'name': fields.String, - 'avatar': fields.String, - 'email': fields.String, - 'last_login_at': TimestampField, - 'last_active_at': TimestampField, - 'created_at': TimestampField, - 'role': fields.String, - 'status': fields.String, + "id": fields.String, + "name": fields.String, + "avatar": fields.String, + "email": fields.String, + "last_login_at": TimestampField, + "last_active_at": TimestampField, + "created_at": TimestampField, + "role": fields.String, + "status": fields.String, } -account_with_role_list_fields = { - 'accounts': fields.List(fields.Nested(account_with_role_fields)) -} +account_with_role_list_fields = {"accounts": fields.List(fields.Nested(account_with_role_fields))} diff --git a/api/fields/message_fields.py b/api/fields/message_fields.py index 3116843589242..3d2df87afb9b1 100644 --- a/api/fields/message_fields.py +++ b/api/fields/message_fields.py @@ -3,83 +3,79 @@ from fields.conversation_fields import message_file_fields from libs.helper import TimestampField -feedback_fields = { - 'rating': fields.String -} +feedback_fields = {"rating": fields.String} retriever_resource_fields = { - 'id': fields.String, - 'message_id': fields.String, - 'position': fields.Integer, - 'dataset_id': fields.String, - 'dataset_name': fields.String, - 'document_id': fields.String, - 'document_name': fields.String, - 'data_source_type': fields.String, - 'segment_id': fields.String, - 'score': fields.Float, - 'hit_count': fields.Integer, - 'word_count': fields.Integer, - 'segment_position': fields.Integer, - 'index_node_hash': fields.String, - 'content': fields.String, - 'created_at': TimestampField + "id": fields.String, + "message_id": fields.String, + "position": fields.Integer, + "dataset_id": fields.String, + "dataset_name": fields.String, + "document_id": fields.String, + "document_name": fields.String, + "data_source_type": fields.String, + "segment_id": fields.String, + "score": fields.Float, + "hit_count": fields.Integer, + "word_count": fields.Integer, + "segment_position": fields.Integer, + "index_node_hash": fields.String, + "content": fields.String, + "created_at": TimestampField, } -feedback_fields = { - 'rating': fields.String -} +feedback_fields = {"rating": fields.String} agent_thought_fields = { - 'id': fields.String, - 'chain_id': fields.String, - 'message_id': fields.String, - 'position': fields.Integer, - 'thought': fields.String, - 'tool': fields.String, - 'tool_labels': fields.Raw, - 'tool_input': fields.String, - 'created_at': TimestampField, - 'observation': fields.String, - 'files': fields.List(fields.String) + "id": fields.String, + "chain_id": fields.String, + "message_id": fields.String, + "position": fields.Integer, + "thought": fields.String, + "tool": fields.String, + "tool_labels": fields.Raw, + "tool_input": fields.String, + "created_at": TimestampField, + "observation": fields.String, + "files": fields.List(fields.String), } retriever_resource_fields = { - 'id': fields.String, - 'message_id': fields.String, - 'position': fields.Integer, - 'dataset_id': fields.String, - 'dataset_name': fields.String, - 'document_id': fields.String, - 'document_name': fields.String, - 'data_source_type': fields.String, - 'segment_id': fields.String, - 'score': fields.Float, - 'hit_count': fields.Integer, - 'word_count': fields.Integer, - 'segment_position': fields.Integer, - 'index_node_hash': fields.String, - 'content': fields.String, - 'created_at': TimestampField + "id": fields.String, + "message_id": fields.String, + "position": fields.Integer, + "dataset_id": fields.String, + "dataset_name": fields.String, + "document_id": fields.String, + "document_name": fields.String, + "data_source_type": fields.String, + "segment_id": fields.String, + "score": fields.Float, + "hit_count": fields.Integer, + "word_count": fields.Integer, + "segment_position": fields.Integer, + "index_node_hash": fields.String, + "content": fields.String, + "created_at": TimestampField, } message_fields = { - 'id': fields.String, - 'conversation_id': fields.String, - 'inputs': fields.Raw, - 'query': fields.String, - 'answer': fields.String(attribute='re_sign_file_url_answer'), - 'feedback': fields.Nested(feedback_fields, attribute='user_feedback', allow_null=True), - 'retriever_resources': fields.List(fields.Nested(retriever_resource_fields)), - 'created_at': TimestampField, - 'agent_thoughts': fields.List(fields.Nested(agent_thought_fields)), - 'message_files': fields.List(fields.Nested(message_file_fields), attribute='files'), - 'status': fields.String, - 'error': fields.String, + "id": fields.String, + "conversation_id": fields.String, + "inputs": fields.Raw, + "query": fields.String, + "answer": fields.String(attribute="re_sign_file_url_answer"), + "feedback": fields.Nested(feedback_fields, attribute="user_feedback", allow_null=True), + "retriever_resources": fields.List(fields.Nested(retriever_resource_fields)), + "created_at": TimestampField, + "agent_thoughts": fields.List(fields.Nested(agent_thought_fields)), + "message_files": fields.List(fields.Nested(message_file_fields), attribute="files"), + "status": fields.String, + "error": fields.String, } message_infinite_scroll_pagination_fields = { - 'limit': fields.Integer, - 'has_more': fields.Boolean, - 'data': fields.List(fields.Nested(message_fields)) + "limit": fields.Integer, + "has_more": fields.Boolean, + "data": fields.List(fields.Nested(message_fields)), } diff --git a/api/fields/segment_fields.py b/api/fields/segment_fields.py index e41d1a53dd023..2dd4cb45be409 100644 --- a/api/fields/segment_fields.py +++ b/api/fields/segment_fields.py @@ -3,31 +3,31 @@ from libs.helper import TimestampField segment_fields = { - 'id': fields.String, - 'position': fields.Integer, - 'document_id': fields.String, - 'content': fields.String, - 'answer': fields.String, - 'word_count': fields.Integer, - 'tokens': fields.Integer, - 'keywords': fields.List(fields.String), - 'index_node_id': fields.String, - 'index_node_hash': fields.String, - 'hit_count': fields.Integer, - 'enabled': fields.Boolean, - 'disabled_at': TimestampField, - 'disabled_by': fields.String, - 'status': fields.String, - 'created_by': fields.String, - 'created_at': TimestampField, - 'indexing_at': TimestampField, - 'completed_at': TimestampField, - 'error': fields.String, - 'stopped_at': TimestampField + "id": fields.String, + "position": fields.Integer, + "document_id": fields.String, + "content": fields.String, + "answer": fields.String, + "word_count": fields.Integer, + "tokens": fields.Integer, + "keywords": fields.List(fields.String), + "index_node_id": fields.String, + "index_node_hash": fields.String, + "hit_count": fields.Integer, + "enabled": fields.Boolean, + "disabled_at": TimestampField, + "disabled_by": fields.String, + "status": fields.String, + "created_by": fields.String, + "created_at": TimestampField, + "indexing_at": TimestampField, + "completed_at": TimestampField, + "error": fields.String, + "stopped_at": TimestampField, } segment_list_response = { - 'data': fields.List(fields.Nested(segment_fields)), - 'has_more': fields.Boolean, - 'limit': fields.Integer + "data": fields.List(fields.Nested(segment_fields)), + "has_more": fields.Boolean, + "limit": fields.Integer, } diff --git a/api/fields/tag_fields.py b/api/fields/tag_fields.py index f7e030b738e53..9af4fc57dd061 100644 --- a/api/fields/tag_fields.py +++ b/api/fields/tag_fields.py @@ -1,8 +1,3 @@ from flask_restful import fields -tag_fields = { - 'id': fields.String, - 'name': fields.String, - 'type': fields.String, - 'binding_count': fields.String -} \ No newline at end of file +tag_fields = {"id": fields.String, "name": fields.String, "type": fields.String, "binding_count": fields.String} diff --git a/api/fields/workflow_app_log_fields.py b/api/fields/workflow_app_log_fields.py index e230c159fba59..a53b54624915c 100644 --- a/api/fields/workflow_app_log_fields.py +++ b/api/fields/workflow_app_log_fields.py @@ -7,18 +7,18 @@ workflow_app_log_partial_fields = { "id": fields.String, - "workflow_run": fields.Nested(workflow_run_for_log_fields, attribute='workflow_run', allow_null=True), + "workflow_run": fields.Nested(workflow_run_for_log_fields, attribute="workflow_run", allow_null=True), "created_from": fields.String, "created_by_role": fields.String, - "created_by_account": fields.Nested(simple_account_fields, attribute='created_by_account', allow_null=True), - "created_by_end_user": fields.Nested(simple_end_user_fields, attribute='created_by_end_user', allow_null=True), - "created_at": TimestampField + "created_by_account": fields.Nested(simple_account_fields, attribute="created_by_account", allow_null=True), + "created_by_end_user": fields.Nested(simple_end_user_fields, attribute="created_by_end_user", allow_null=True), + "created_at": TimestampField, } workflow_app_log_pagination_fields = { - 'page': fields.Integer, - 'limit': fields.Integer(attribute='per_page'), - 'total': fields.Integer, - 'has_more': fields.Boolean(attribute='has_next'), - 'data': fields.List(fields.Nested(workflow_app_log_partial_fields), attribute='items') + "page": fields.Integer, + "limit": fields.Integer(attribute="per_page"), + "total": fields.Integer, + "has_more": fields.Boolean(attribute="has_next"), + "data": fields.List(fields.Nested(workflow_app_log_partial_fields), attribute="items"), } diff --git a/api/fields/workflow_fields.py b/api/fields/workflow_fields.py index c1dd0e184a797..240b8f2eb03e7 100644 --- a/api/fields/workflow_fields.py +++ b/api/fields/workflow_fields.py @@ -13,43 +13,43 @@ def format(self, value): # Mask secret variables values in environment_variables if isinstance(value, SecretVariable): return { - 'id': value.id, - 'name': value.name, - 'value': encrypter.obfuscated_token(value.value), - 'value_type': value.value_type.value, + "id": value.id, + "name": value.name, + "value": encrypter.obfuscated_token(value.value), + "value_type": value.value_type.value, } if isinstance(value, Variable): return { - 'id': value.id, - 'name': value.name, - 'value': value.value, - 'value_type': value.value_type.value, + "id": value.id, + "name": value.name, + "value": value.value, + "value_type": value.value_type.value, } if isinstance(value, dict): - value_type = value.get('value_type') + value_type = value.get("value_type") if value_type not in ENVIRONMENT_VARIABLE_SUPPORTED_TYPES: - raise ValueError(f'Unsupported environment variable value type: {value_type}') + raise ValueError(f"Unsupported environment variable value type: {value_type}") return value conversation_variable_fields = { - 'id': fields.String, - 'name': fields.String, - 'value_type': fields.String(attribute='value_type.value'), - 'value': fields.Raw, - 'description': fields.String, + "id": fields.String, + "name": fields.String, + "value_type": fields.String(attribute="value_type.value"), + "value": fields.Raw, + "description": fields.String, } workflow_fields = { - 'id': fields.String, - 'graph': fields.Raw(attribute='graph_dict'), - 'features': fields.Raw(attribute='features_dict'), - 'hash': fields.String(attribute='unique_hash'), - 'created_by': fields.Nested(simple_account_fields, attribute='created_by_account'), - 'created_at': TimestampField, - 'updated_by': fields.Nested(simple_account_fields, attribute='updated_by_account', allow_null=True), - 'updated_at': TimestampField, - 'tool_published': fields.Boolean, - 'environment_variables': fields.List(EnvironmentVariableField()), - 'conversation_variables': fields.List(fields.Nested(conversation_variable_fields)), + "id": fields.String, + "graph": fields.Raw(attribute="graph_dict"), + "features": fields.Raw(attribute="features_dict"), + "hash": fields.String(attribute="unique_hash"), + "created_by": fields.Nested(simple_account_fields, attribute="created_by_account"), + "created_at": TimestampField, + "updated_by": fields.Nested(simple_account_fields, attribute="updated_by_account", allow_null=True), + "updated_at": TimestampField, + "tool_published": fields.Boolean, + "environment_variables": fields.List(EnvironmentVariableField()), + "conversation_variables": fields.List(fields.Nested(conversation_variable_fields)), } diff --git a/api/fields/workflow_run_fields.py b/api/fields/workflow_run_fields.py index 3e798473cd048..1413adf719687 100644 --- a/api/fields/workflow_run_fields.py +++ b/api/fields/workflow_run_fields.py @@ -13,7 +13,7 @@ "total_tokens": fields.Integer, "total_steps": fields.Integer, "created_at": TimestampField, - "finished_at": TimestampField + "finished_at": TimestampField, } workflow_run_for_list_fields = { @@ -24,9 +24,9 @@ "elapsed_time": fields.Float, "total_tokens": fields.Integer, "total_steps": fields.Integer, - "created_by_account": fields.Nested(simple_account_fields, attribute='created_by_account', allow_null=True), + "created_by_account": fields.Nested(simple_account_fields, attribute="created_by_account", allow_null=True), "created_at": TimestampField, - "finished_at": TimestampField + "finished_at": TimestampField, } advanced_chat_workflow_run_for_list_fields = { @@ -39,40 +39,40 @@ "elapsed_time": fields.Float, "total_tokens": fields.Integer, "total_steps": fields.Integer, - "created_by_account": fields.Nested(simple_account_fields, attribute='created_by_account', allow_null=True), + "created_by_account": fields.Nested(simple_account_fields, attribute="created_by_account", allow_null=True), "created_at": TimestampField, - "finished_at": TimestampField + "finished_at": TimestampField, } advanced_chat_workflow_run_pagination_fields = { - 'limit': fields.Integer(attribute='limit'), - 'has_more': fields.Boolean(attribute='has_more'), - 'data': fields.List(fields.Nested(advanced_chat_workflow_run_for_list_fields), attribute='data') + "limit": fields.Integer(attribute="limit"), + "has_more": fields.Boolean(attribute="has_more"), + "data": fields.List(fields.Nested(advanced_chat_workflow_run_for_list_fields), attribute="data"), } workflow_run_pagination_fields = { - 'limit': fields.Integer(attribute='limit'), - 'has_more': fields.Boolean(attribute='has_more'), - 'data': fields.List(fields.Nested(workflow_run_for_list_fields), attribute='data') + "limit": fields.Integer(attribute="limit"), + "has_more": fields.Boolean(attribute="has_more"), + "data": fields.List(fields.Nested(workflow_run_for_list_fields), attribute="data"), } workflow_run_detail_fields = { "id": fields.String, "sequence_number": fields.Integer, "version": fields.String, - "graph": fields.Raw(attribute='graph_dict'), - "inputs": fields.Raw(attribute='inputs_dict'), + "graph": fields.Raw(attribute="graph_dict"), + "inputs": fields.Raw(attribute="inputs_dict"), "status": fields.String, - "outputs": fields.Raw(attribute='outputs_dict'), + "outputs": fields.Raw(attribute="outputs_dict"), "error": fields.String, "elapsed_time": fields.Float, "total_tokens": fields.Integer, "total_steps": fields.Integer, "created_by_role": fields.String, - "created_by_account": fields.Nested(simple_account_fields, attribute='created_by_account', allow_null=True), - "created_by_end_user": fields.Nested(simple_end_user_fields, attribute='created_by_end_user', allow_null=True), + "created_by_account": fields.Nested(simple_account_fields, attribute="created_by_account", allow_null=True), + "created_by_end_user": fields.Nested(simple_end_user_fields, attribute="created_by_end_user", allow_null=True), "created_at": TimestampField, - "finished_at": TimestampField + "finished_at": TimestampField, } workflow_run_node_execution_fields = { @@ -82,21 +82,21 @@ "node_id": fields.String, "node_type": fields.String, "title": fields.String, - "inputs": fields.Raw(attribute='inputs_dict'), - "process_data": fields.Raw(attribute='process_data_dict'), - "outputs": fields.Raw(attribute='outputs_dict'), + "inputs": fields.Raw(attribute="inputs_dict"), + "process_data": fields.Raw(attribute="process_data_dict"), + "outputs": fields.Raw(attribute="outputs_dict"), "status": fields.String, "error": fields.String, "elapsed_time": fields.Float, - "execution_metadata": fields.Raw(attribute='execution_metadata_dict'), + "execution_metadata": fields.Raw(attribute="execution_metadata_dict"), "extras": fields.Raw, "created_at": TimestampField, "created_by_role": fields.String, - "created_by_account": fields.Nested(simple_account_fields, attribute='created_by_account', allow_null=True), - "created_by_end_user": fields.Nested(simple_end_user_fields, attribute='created_by_end_user', allow_null=True), - "finished_at": TimestampField + "created_by_account": fields.Nested(simple_account_fields, attribute="created_by_account", allow_null=True), + "created_by_end_user": fields.Nested(simple_end_user_fields, attribute="created_by_end_user", allow_null=True), + "finished_at": TimestampField, } workflow_run_node_execution_list_fields = { - 'data': fields.List(fields.Nested(workflow_run_node_execution_fields)), + "data": fields.List(fields.Nested(workflow_run_node_execution_fields)), } diff --git a/api/migrations/versions/2024_08_14_1354-8782057ff0dc_add_conversations_dialogue_count.py b/api/migrations/versions/2024_08_14_1354-8782057ff0dc_add_conversations_dialogue_count.py new file mode 100644 index 0000000000000..eba78e2e77d5d --- /dev/null +++ b/api/migrations/versions/2024_08_14_1354-8782057ff0dc_add_conversations_dialogue_count.py @@ -0,0 +1,33 @@ +"""add conversations.dialogue_count + +Revision ID: 8782057ff0dc +Revises: 63a83fcf12ba +Create Date: 2024-08-14 13:54:25.161324 + +""" +import sqlalchemy as sa +from alembic import op + +import models as models + +# revision identifiers, used by Alembic. +revision = '8782057ff0dc' +down_revision = '63a83fcf12ba' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.add_column(sa.Column('dialogue_count', sa.Integer(), server_default='0', nullable=False)) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.drop_column('dialogue_count') + + # ### end Alembic commands ### diff --git a/api/models/__init__.py b/api/models/__init__.py index f831356841629..4012611471c33 100644 --- a/api/models/__init__.py +++ b/api/models/__init__.py @@ -1,10 +1,10 @@ from enum import Enum -from .model import AppMode +from .model import App, AppMode, Message from .types import StringUUID -from .workflow import ConversationVariable, WorkflowNodeExecutionStatus +from .workflow import ConversationVariable, Workflow, WorkflowNodeExecutionStatus -__all__ = ['ConversationVariable', 'StringUUID', 'AppMode', 'WorkflowNodeExecutionStatus'] +__all__ = ['ConversationVariable', 'StringUUID', 'AppMode', 'WorkflowNodeExecutionStatus', 'Workflow', 'App', 'Message'] class CreatedByRole(Enum): diff --git a/api/models/model.py b/api/models/model.py index 9909b10dc0952..5426d3bc83e02 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -7,6 +7,7 @@ from flask import request from flask_login import UserMixin from sqlalchemy import Float, func, text +from sqlalchemy.orm import Mapped, mapped_column from configs import dify_config from core.file.tool_file_parser import ToolFileParser @@ -512,12 +513,12 @@ class Conversation(db.Model): from_account_id = db.Column(StringUUID) read_at = db.Column(db.DateTime) read_account_id = db.Column(StringUUID) + dialogue_count: Mapped[int] = mapped_column(default=0) created_at = db.Column(db.DateTime, nullable=False, server_default=db.text('CURRENT_TIMESTAMP(0)')) updated_at = db.Column(db.DateTime, nullable=False, server_default=db.text('CURRENT_TIMESTAMP(0)')) messages = db.relationship("Message", backref="conversation", lazy='select', passive_deletes="all") - message_annotations = db.relationship("MessageAnnotation", backref="conversation", lazy='select', - passive_deletes="all") + message_annotations = db.relationship("MessageAnnotation", backref="conversation", lazy='select', passive_deletes="all") is_deleted = db.Column(db.Boolean, nullable=False, server_default=db.text('false')) diff --git a/api/pyproject.toml b/api/pyproject.toml index 82e2aaeb2b382..3e107f5e9b0bc 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -69,7 +69,18 @@ ignore = [ ] [tool.ruff.format] -quote-style = "single" +exclude = [ + "core/**/*.py", + "controllers/**/*.py", + "models/**/*.py", + "utils/**/*.py", + "migrations/**/*", + "services/**/*.py", + "tasks/**/*.py", + "tests/**/*.py", + "libs/**/*.py", + "configs/**/*.py", +] [tool.pytest_env] OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii" diff --git a/api/schedule/clean_embedding_cache_task.py b/api/schedule/clean_embedding_cache_task.py index ccc1062266a02..67d070682867b 100644 --- a/api/schedule/clean_embedding_cache_task.py +++ b/api/schedule/clean_embedding_cache_task.py @@ -11,27 +11,32 @@ from models.dataset import Embedding -@app.celery.task(queue='dataset') +@app.celery.task(queue="dataset") def clean_embedding_cache_task(): - click.echo(click.style('Start clean embedding cache.', fg='green')) + click.echo(click.style("Start clean embedding cache.", fg="green")) clean_days = int(dify_config.CLEAN_DAY_SETTING) start_at = time.perf_counter() thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days) while True: try: - embedding_ids = db.session.query(Embedding.id).filter(Embedding.created_at < thirty_days_ago) \ - .order_by(Embedding.created_at.desc()).limit(100).all() + embedding_ids = ( + db.session.query(Embedding.id) + .filter(Embedding.created_at < thirty_days_ago) + .order_by(Embedding.created_at.desc()) + .limit(100) + .all() + ) embedding_ids = [embedding_id[0] for embedding_id in embedding_ids] except NotFound: break if embedding_ids: for embedding_id in embedding_ids: - db.session.execute(text( - "DELETE FROM embeddings WHERE id = :embedding_id" - ), {'embedding_id': embedding_id}) + db.session.execute( + text("DELETE FROM embeddings WHERE id = :embedding_id"), {"embedding_id": embedding_id} + ) db.session.commit() else: break end_at = time.perf_counter() - click.echo(click.style('Cleaned embedding cache from db success latency: {}'.format(end_at - start_at), fg='green')) + click.echo(click.style("Cleaned embedding cache from db success latency: {}".format(end_at - start_at), fg="green")) diff --git a/api/schedule/clean_unused_datasets_task.py b/api/schedule/clean_unused_datasets_task.py index b2b2f82b786f5..3d799bfd4ef73 100644 --- a/api/schedule/clean_unused_datasets_task.py +++ b/api/schedule/clean_unused_datasets_task.py @@ -12,9 +12,9 @@ from models.dataset import Dataset, DatasetQuery, Document -@app.celery.task(queue='dataset') +@app.celery.task(queue="dataset") def clean_unused_datasets_task(): - click.echo(click.style('Start clean unused datasets indexes.', fg='green')) + click.echo(click.style("Start clean unused datasets indexes.", fg="green")) clean_days = dify_config.CLEAN_DAY_SETTING start_at = time.perf_counter() thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days) @@ -22,40 +22,44 @@ def clean_unused_datasets_task(): while True: try: # Subquery for counting new documents - document_subquery_new = db.session.query( - Document.dataset_id, - func.count(Document.id).label('document_count') - ).filter( - Document.indexing_status == 'completed', - Document.enabled == True, - Document.archived == False, - Document.updated_at > thirty_days_ago - ).group_by(Document.dataset_id).subquery() + document_subquery_new = ( + db.session.query(Document.dataset_id, func.count(Document.id).label("document_count")) + .filter( + Document.indexing_status == "completed", + Document.enabled == True, + Document.archived == False, + Document.updated_at > thirty_days_ago, + ) + .group_by(Document.dataset_id) + .subquery() + ) # Subquery for counting old documents - document_subquery_old = db.session.query( - Document.dataset_id, - func.count(Document.id).label('document_count') - ).filter( - Document.indexing_status == 'completed', - Document.enabled == True, - Document.archived == False, - Document.updated_at < thirty_days_ago - ).group_by(Document.dataset_id).subquery() + document_subquery_old = ( + db.session.query(Document.dataset_id, func.count(Document.id).label("document_count")) + .filter( + Document.indexing_status == "completed", + Document.enabled == True, + Document.archived == False, + Document.updated_at < thirty_days_ago, + ) + .group_by(Document.dataset_id) + .subquery() + ) # Main query with join and filter - datasets = (db.session.query(Dataset) - .outerjoin( - document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id - ).outerjoin( - document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id - ).filter( - Dataset.created_at < thirty_days_ago, - func.coalesce(document_subquery_new.c.document_count, 0) == 0, - func.coalesce(document_subquery_old.c.document_count, 0) > 0 - ).order_by( - Dataset.created_at.desc() - ).paginate(page=page, per_page=50)) + datasets = ( + db.session.query(Dataset) + .outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id) + .outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id) + .filter( + Dataset.created_at < thirty_days_ago, + func.coalesce(document_subquery_new.c.document_count, 0) == 0, + func.coalesce(document_subquery_old.c.document_count, 0) > 0, + ) + .order_by(Dataset.created_at.desc()) + .paginate(page=page, per_page=50) + ) except NotFound: break @@ -63,10 +67,11 @@ def clean_unused_datasets_task(): break page += 1 for dataset in datasets: - dataset_query = db.session.query(DatasetQuery).filter( - DatasetQuery.created_at > thirty_days_ago, - DatasetQuery.dataset_id == dataset.id - ).all() + dataset_query = ( + db.session.query(DatasetQuery) + .filter(DatasetQuery.created_at > thirty_days_ago, DatasetQuery.dataset_id == dataset.id) + .all() + ) if not dataset_query or len(dataset_query) == 0: try: # remove index @@ -74,17 +79,14 @@ def clean_unused_datasets_task(): index_processor.clean(dataset, None) # update document - update_params = { - Document.enabled: False - } + update_params = {Document.enabled: False} Document.query.filter_by(dataset_id=dataset.id).update(update_params) db.session.commit() - click.echo(click.style('Cleaned unused dataset {} from db success!'.format(dataset.id), - fg='green')) + click.echo(click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green")) except Exception as e: click.echo( - click.style('clean dataset index error: {} {}'.format(e.__class__.__name__, str(e)), - fg='red')) + click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red") + ) end_at = time.perf_counter() - click.echo(click.style('Cleaned unused dataset from db success latency: {}'.format(end_at - start_at), fg='green')) + click.echo(click.style("Cleaned unused dataset from db success latency: {}".format(end_at - start_at), fg="green")) diff --git a/api/services/app_dsl_service.py b/api/services/app_dsl_service.py index 838585c575fdd..bfb160b3e476d 100644 --- a/api/services/app_dsl_service.py +++ b/api/services/app_dsl_service.py @@ -285,6 +285,8 @@ def _import_and_overwrite_workflow_based_app(cls, # sync draft workflow environment_variables_list = workflow_data.get('environment_variables') or [] environment_variables = [factory.build_variable_from_mapping(obj) for obj in environment_variables_list] + conversation_variables_list = workflow_data.get('conversation_variables') or [] + conversation_variables = [factory.build_variable_from_mapping(obj) for obj in conversation_variables_list] draft_workflow = workflow_service.sync_draft_workflow( app_model=app_model, graph=workflow_data.get('graph', {}), @@ -292,6 +294,7 @@ def _import_and_overwrite_workflow_based_app(cls, unique_hash=unique_hash, account=account, environment_variables=environment_variables, + conversation_variables=conversation_variables, ) return draft_workflow diff --git a/api/tests/integration_tests/workflow/nodes/test_llm.py b/api/tests/integration_tests/workflow/nodes/test_llm.py index ac704e4eaf54d..4686ce06752ed 100644 --- a/api/tests/integration_tests/workflow/nodes/test_llm.py +++ b/api/tests/integration_tests/workflow/nodes/test_llm.py @@ -10,8 +10,8 @@ from core.model_manager import ModelInstance from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.model_providers import ModelProviderFactory -from core.workflow.entities.node_entities import SystemVariable from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable from core.workflow.nodes.base_node import UserFrom from core.workflow.nodes.llm.llm_node import LLMNode from extensions.ext_database import db @@ -236,4 +236,4 @@ def test_execute_llm_with_jinja2(setup_code_executor_mock, setup_openai_mock): assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED assert 'sunny' in json.dumps(result.process_data) - assert 'what\'s the weather today?' in json.dumps(result.process_data) \ No newline at end of file + assert 'what\'s the weather today?' in json.dumps(result.process_data) diff --git a/api/tests/integration_tests/workflow/nodes/test_parameter_extractor.py b/api/tests/integration_tests/workflow/nodes/test_parameter_extractor.py index 312ad47026beb..adf5ffe3cadf7 100644 --- a/api/tests/integration_tests/workflow/nodes/test_parameter_extractor.py +++ b/api/tests/integration_tests/workflow/nodes/test_parameter_extractor.py @@ -12,8 +12,8 @@ from core.model_manager import ModelInstance from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory -from core.workflow.entities.node_entities import SystemVariable from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable from core.workflow.nodes.base_node import UserFrom from core.workflow.nodes.parameter_extractor.parameter_extractor_node import ParameterExtractorNode from extensions.ext_database import db @@ -363,7 +363,7 @@ def test_extract_json_response(): { "location": "kawaii" } - hello world. + hello world. """) assert result['location'] == 'kawaii' @@ -445,4 +445,4 @@ def test_chat_parameter_extractor_with_memory(setup_anthropic_mock): assert latest_role != prompt.get('role') if prompt.get('role') in ['user', 'assistant']: - latest_role = prompt.get('role') \ No newline at end of file + latest_role = prompt.get('role') diff --git a/api/tests/unit_tests/core/app/segments/test_factory.py b/api/tests/unit_tests/core/app/segments/test_factory.py index a8429b9c1b1cc..afd0fa50b590f 100644 --- a/api/tests/unit_tests/core/app/segments/test_factory.py +++ b/api/tests/unit_tests/core/app/segments/test_factory.py @@ -3,12 +3,9 @@ import pytest from core.app.segments import ( - ArrayFileVariable, ArrayNumberVariable, ArrayObjectVariable, ArrayStringVariable, - FileSegment, - FileVariable, FloatVariable, IntegerVariable, ObjectSegment, @@ -149,83 +146,6 @@ def test_array_object_variable(): assert isinstance(variable.value[1]['key2'], int) -def test_file_variable(): - mapping = { - 'id': str(uuid4()), - 'value_type': 'file', - 'name': 'test_file', - 'description': 'Description of the variable.', - 'value': { - 'id': str(uuid4()), - 'tenant_id': 'tenant_id', - 'type': 'image', - 'transfer_method': 'local_file', - 'url': 'url', - 'related_id': 'related_id', - 'extra_config': { - 'image_config': { - 'width': 100, - 'height': 100, - }, - }, - 'filename': 'filename', - 'extension': 'extension', - 'mime_type': 'mime_type', - }, - } - variable = factory.build_variable_from_mapping(mapping) - assert isinstance(variable, FileVariable) - - -def test_array_file_variable(): - mapping = { - 'id': str(uuid4()), - 'value_type': 'array[file]', - 'name': 'test_array_file', - 'description': 'Description of the variable.', - 'value': [ - { - 'id': str(uuid4()), - 'tenant_id': 'tenant_id', - 'type': 'image', - 'transfer_method': 'local_file', - 'url': 'url', - 'related_id': 'related_id', - 'extra_config': { - 'image_config': { - 'width': 100, - 'height': 100, - }, - }, - 'filename': 'filename', - 'extension': 'extension', - 'mime_type': 'mime_type', - }, - { - 'id': str(uuid4()), - 'tenant_id': 'tenant_id', - 'type': 'image', - 'transfer_method': 'local_file', - 'url': 'url', - 'related_id': 'related_id', - 'extra_config': { - 'image_config': { - 'width': 100, - 'height': 100, - }, - }, - 'filename': 'filename', - 'extension': 'extension', - 'mime_type': 'mime_type', - }, - ], - } - variable = factory.build_variable_from_mapping(mapping) - assert isinstance(variable, ArrayFileVariable) - assert isinstance(variable.value[0], FileSegment) - assert isinstance(variable.value[1], FileSegment) - - def test_variable_cannot_large_than_5_kb(): with pytest.raises(VariableError): factory.build_variable_from_mapping( diff --git a/api/tests/unit_tests/core/app/segments/test_segment.py b/api/tests/unit_tests/core/app/segments/test_segment.py index 414404b7d0362..7e3e69ffbfc45 100644 --- a/api/tests/unit_tests/core/app/segments/test_segment.py +++ b/api/tests/unit_tests/core/app/segments/test_segment.py @@ -1,7 +1,7 @@ from core.app.segments import SecretVariable, StringSegment, parser from core.helper import encrypter -from core.workflow.entities.node_entities import SystemVariable from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable def test_segment_group_to_text(): diff --git a/api/tests/unit_tests/core/workflow/nodes/test_answer.py b/api/tests/unit_tests/core/workflow/nodes/test_answer.py index 3a32829e373c2..4617b6a42f8ec 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_answer.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_answer.py @@ -1,8 +1,8 @@ from unittest.mock import MagicMock from core.app.entities.app_invoke_entities import InvokeFrom -from core.workflow.entities.node_entities import SystemVariable from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable from core.workflow.nodes.answer.answer_node import AnswerNode from core.workflow.nodes.base_node import UserFrom from extensions.ext_database import db diff --git a/api/tests/unit_tests/core/workflow/nodes/test_if_else.py b/api/tests/unit_tests/core/workflow/nodes/test_if_else.py index 4662c5ff2b26d..d21b7785c4f4a 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_if_else.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_if_else.py @@ -1,8 +1,8 @@ from unittest.mock import MagicMock from core.app.entities.app_invoke_entities import InvokeFrom -from core.workflow.entities.node_entities import SystemVariable from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable from core.workflow.nodes.base_node import UserFrom from core.workflow.nodes.if_else.if_else_node import IfElseNode from extensions.ext_database import db diff --git a/api/tests/unit_tests/core/workflow/nodes/test_variable_assigner.py b/api/tests/unit_tests/core/workflow/nodes/test_variable_assigner.py index 8706ba05ceaee..0b37d06fc069b 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_variable_assigner.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_variable_assigner.py @@ -3,8 +3,8 @@ from core.app.entities.app_invoke_entities import InvokeFrom from core.app.segments import ArrayStringVariable, StringVariable -from core.workflow.entities.node_entities import SystemVariable from core.workflow.entities.variable_pool import VariablePool +from core.workflow.enums import SystemVariable from core.workflow.nodes.base_node import UserFrom from core.workflow.nodes.variable_assigner import VariableAssignerNode, WriteMode diff --git a/api/tests/unit_tests/utils/position_helper/test_position_helper.py b/api/tests/unit_tests/utils/position_helper/test_position_helper.py index 22373199043d6..eefe374df0762 100644 --- a/api/tests/unit_tests/utils/position_helper/test_position_helper.py +++ b/api/tests/unit_tests/utils/position_helper/test_position_helper.py @@ -2,7 +2,7 @@ import pytest -from core.helper.position_helper import get_position_map +from core.helper.position_helper import get_position_map, sort_and_filter_position_map @pytest.fixture @@ -53,3 +53,47 @@ def test_position_helper_with_all_commented(prepare_empty_commented_positions_ya folder_path=prepare_empty_commented_positions_yaml, file_name='example_positions_all_commented.yaml') assert position_map == {} + + +def test_excluded_position_map(prepare_example_positions_yaml): + position_map = get_position_map( + folder_path=prepare_example_positions_yaml, + file_name='example_positions.yaml' + ) + pin_list = ['forth', 'first'] + include_list = [] + exclude_list = ['9999999999999'] + sorted_filtered_position_map = sort_and_filter_position_map( + original_position_map=position_map, + pin_list=pin_list, + include_list=include_list, + exclude_list=exclude_list + ) + assert sorted_filtered_position_map == { + 'forth': 0, + 'first': 1, + 'second': 2, + 'third': 3, + } + + +def test_included_position_map(prepare_example_positions_yaml): + position_map = get_position_map( + folder_path=prepare_example_positions_yaml, + file_name='example_positions.yaml' + ) + pin_list = ['second', 'first'] + include_list = ['first', 'second', 'third', 'forth'] + exclude_list = [] + sorted_filtered_position_map = sort_and_filter_position_map( + original_position_map=position_map, + pin_list=pin_list, + include_list=include_list, + exclude_list=exclude_list + ) + assert sorted_filtered_position_map == { + 'second': 0, + 'first': 1, + 'third': 2, + 'forth': 3, + } diff --git a/dev/reformat b/dev/reformat index f50ccb04c44ed..ad83e897d978b 100755 --- a/dev/reformat +++ b/dev/reformat @@ -11,5 +11,8 @@ fi # run ruff linter ruff check --fix ./api +# run ruff formatter +ruff format ./api + # run dotenv-linter linter dotenv-linter ./api/.env.example ./web/.env.example diff --git a/docker/.env.example b/docker/.env.example index 6fee8b4b3ca5b..5898d3e62a8d1 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -695,3 +695,22 @@ COMPOSE_PROFILES=${VECTOR_STORE:-weaviate} # ------------------------------ EXPOSE_NGINX_PORT=80 EXPOSE_NGINX_SSL_PORT=443 + +# ---------------------------------------------------------------------------- +# ModelProvider & Tool Position Configuration +# Used to specify the model providers and tools that can be used in the app. +# ---------------------------------------------------------------------------- + +# Pin, include, and exclude tools +# Use comma-separated values with no spaces between items. +# Example: POSITION_TOOL_PINS=bing,google +POSITION_TOOL_PINS= +POSITION_TOOL_INCLUDES= +POSITION_TOOL_EXCLUDES= + +# Pin, include, and exclude model providers +# Use comma-separated values with no spaces between items. +# Example: POSITION_PROVIDER_PINS=openai,openllm +POSITION_PROVIDER_PINS= +POSITION_PROVIDER_INCLUDES= +POSITION_PROVIDER_EXCLUDES= \ No newline at end of file diff --git a/web/app/components/base/markdown.tsx b/web/app/components/base/markdown.tsx index 02d00334b6ceb..af4b13ff70dea 100644 --- a/web/app/components/base/markdown.tsx +++ b/web/app/components/base/markdown.tsx @@ -14,6 +14,7 @@ import cn from '@/utils/classnames' import CopyBtn from '@/app/components/base/copy-btn' import SVGBtn from '@/app/components/base/svg' import Flowchart from '@/app/components/base/mermaid' +import ImageGallery from '@/app/components/base/image-gallery' // Available language https://github.com/react-syntax-highlighter/react-syntax-highlighter/blob/master/AVAILABLE_LANGUAGES_HLJS.MD const capitalizationLanguageNameMap: Record = { @@ -46,9 +47,9 @@ const getCorrectCapitalizationLanguageName = (language: string) => { const preprocessLaTeX = (content: string) => { if (typeof content !== 'string') return content - return content.replace(/\\\[(.*?)\\\]/gs, (_, equation) => `$$${equation}$$`) - .replace(/\\\((.*?)\\\)/gs, (_, equation) => `$$${equation}$$`) - .replace(/(^|[^\\])\$(.+?)\$/gs, (_, prefix, equation) => `${prefix}$${equation}$`) + return content.replace(/\\\[(.*?)\\\]/g, (_, equation) => `$$${equation}$$`) + .replace(/\\\((.*?)\\\)/g, (_, equation) => `$$${equation}$$`) + .replace(/(^|[^\\])\$(.+?)\$/g, (_, prefix, equation) => `${prefix}$${equation}$`) } export function PreCode(props: { children: any }) { @@ -58,12 +59,6 @@ export function PreCode(props: { children: any }) {
        {
-          if (ref.current) {
-            const code = ref.current.innerText
-            // copyToClipboard(code);
-          }
-        }}
       >
       {props.children}
     
@@ -172,7 +167,7 @@ const CodeBlock: CodeComponent = memo(({ inline, className, children, ...props } {children} ) - }, [children, className, inline, isSVG, language, languageShowName, match, props]) + }, [chartData, children, className, inline, isSVG, language, languageShowName, match, props]) }) CodeBlock.displayName = 'CodeBlock' @@ -188,17 +183,9 @@ export function Markdown(props: { content: string; className?: string }) { ]} components={{ code: CodeBlock, - img({ src, alt, ...props }) { + img({ src }) { return ( - // eslint-disable-next-line @next/next/no-img-element - {alt} + ) }, p: (paragraph) => { @@ -208,14 +195,7 @@ export function Markdown(props: { content: string; className?: string }) { return ( <> - {/* eslint-disable-next-line @next/next/no-img-element */} - {image.properties.alt} +

{paragraph.children.slice(1)}

) diff --git a/web/app/components/header/account-setting/model-provider-page/model-icon/index.tsx b/web/app/components/header/account-setting/model-provider-page/model-icon/index.tsx index 347572c755ae1..a22ec16c25228 100644 --- a/web/app/components/header/account-setting/model-provider-page/model-icon/index.tsx +++ b/web/app/components/header/account-setting/model-provider-page/model-icon/index.tsx @@ -19,7 +19,7 @@ const ModelIcon: FC = ({ }) => { const language = useLanguage() - if (provider?.provider === 'openai' && modelName?.startsWith('gpt-4')) + if (provider?.provider === 'openai' && (modelName?.startsWith('gpt-4') || modelName?.includes('4o'))) return if (provider?.icon_small) { diff --git a/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/parameter-item.tsx b/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/parameter-item.tsx index 57ea4bdd118fe..eced2a8082bb8 100644 --- a/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/parameter-item.tsx +++ b/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/parameter-item.tsx @@ -100,7 +100,7 @@ const ParameterItem: FC = ({ handleInputChange(v === 1) } - const handleStringInputChange = (e: React.ChangeEvent) => { + const handleStringInputChange = (e: React.ChangeEvent) => { handleInputChange(e.target.value) } @@ -190,6 +190,16 @@ const ParameterItem: FC = ({ ) } + if (parameterRule.type === 'text') { + return ( +