From 5e93e4781160407b806097709bc4783d6cb5dc4d Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 3 Oct 2024 14:44:16 +0200 Subject: [PATCH 1/3] refactor: Settings management workflow - fix: settings.schema is not cached anymore - feat: Allow to update and delete questions - fix: properly remove settings with .remove() --- argilla/src/argilla/settings/_resource.py | 60 ++++++++++++++++++++--- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/argilla/src/argilla/settings/_resource.py b/argilla/src/argilla/settings/_resource.py index 2ff15801d2..5fc89ae9e3 100644 --- a/argilla/src/argilla/settings/_resource.py +++ b/argilla/src/argilla/settings/_resource.py @@ -15,7 +15,6 @@ import json import os import re -from functools import cached_property from pathlib import Path from typing import List, Optional, TYPE_CHECKING, Dict, Union, Iterator, Sequence, Literal from uuid import UUID @@ -160,7 +159,8 @@ def dataset(self, dataset: "Dataset"): self._dataset = dataset self._client = dataset._client - @cached_property + # TODO: The schema should be refreshed after a change on the settings names + @property def schema(self) -> dict: schema_dict = {} @@ -178,7 +178,7 @@ def schema(self) -> dict: return schema_dict - @cached_property + @property def schema_by_id(self) -> Dict[UUID, Union[Field, QuestionType, MetadataType, VectorField]]: return {v.id: v for v in self.schema.values()} @@ -219,7 +219,7 @@ def update(self) -> "Resource": self.__fields.update() self.__vectors.update() self.__metadata.update() - # self.questions.update() + self.questions.update() self._update_last_api_call() return self @@ -385,9 +385,8 @@ def _update_dataset_related_attributes(self): self._client.api.datasets.update(dataset_model) def _validate_empty_settings(self): - if not all([self.fields, self.questions]): - message = "Fields and questions are required" - raise SettingsError(message=message) + if not all(self.fields): + raise SettingsError(message="At least one field must be defined in the settings") def _validate_duplicate_names(self) -> None: dataset_properties_by_name = {} @@ -488,6 +487,16 @@ def add(self, property: Property) -> Property: setattr(self, property.name, property) return property + def remove(self, property: Union[str, Property]) -> None: + if isinstance(property, str): + property = self._properties_by_name.get(property) + + if property is None: + return + + property = self._properties_by_name.pop(property.name) + property.delete() + def create(self): for property in self: try: @@ -530,6 +539,17 @@ class to work with questions as we do with fields, vectors, or metadata (special Once issue https://github.com/argilla-io/argilla/issues/4931 is tackled, this class should be removed. """ + def remove(self, property: Union[str, Property]) -> None: + if isinstance(property, str): + property = self._properties_by_name.get(property) + + if property is None: + return + + property = self._properties_by_name.pop(property.name) + self._delete_question(property) + + # TODO: Align to the Resource model def create(self): for question in self: try: @@ -537,9 +557,33 @@ def create(self): except ArgillaAPIError as e: raise SettingsError(f"Failed to create question {question.name}") from e - def _create_question(self, question: QuestionType) -> None: + # TODO: Align to the Resource model + def update(self): + for item in self: + try: + item.dataset = self._settings.dataset + self._update_question(item) if item.id else self._create_question(item) + except ArgillaAPIError as e: + raise SettingsError(f"Failed to update {item.name!r}: {e.message}") from e + + # TODO: Align to the Resource model + def _create_question(self, question: QuestionType) -> QuestionType: question_model = self._settings._client.api.questions.create( dataset_id=self._settings.dataset.id, question=question.api_model(), ) question._model = question_model + + return question + + # TODO: Align to the Resource model + def _update_question(self, question: QuestionType) -> QuestionType: + question_model = self._settings._client.api.questions.update(question.api_model()) + question._model = question_model + + return question + + # TODO: Align to the Resource model + def _delete_question(self, question: QuestionType) -> None: + question_model = self._settings._client.api.questions.delete(question.id) + question._model = question_model From 989cd69bed56b67a963f9cea066de70739332ab5 Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 3 Oct 2024 14:46:40 +0200 Subject: [PATCH 2/3] refactor: Allow create dataset without publishing and expose publish method --- argilla/src/argilla/datasets/_resource.py | 32 ++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/argilla/src/argilla/datasets/_resource.py b/argilla/src/argilla/datasets/_resource.py index 7637d90da0..484f4ab4f7 100644 --- a/argilla/src/argilla/datasets/_resource.py +++ b/argilla/src/argilla/datasets/_resource.py @@ -151,15 +151,24 @@ def get(self) -> "Dataset": self.settings.get() return self - def create(self) -> "Dataset": + def create(self, publish: bool = True) -> "Dataset": """Creates the dataset on the server with the `Settings` configuration. + Parameters: + publish (bool): If True, the dataset is published after creation. Default is True. + Returns: Dataset: The created dataset object. + """ - super().create() try: - return self._publish() + super().create() + self._settings.create() + + if publish: + return self.publish() + + return self.get() except Exception as e: self._log_message(message=f"Error creating dataset: {e}", level="error") self._rollback_dataset_creation() @@ -174,6 +183,17 @@ def update(self) -> "Dataset": self.settings.update() return self + def publish(self) -> "Dataset": + """ + Publishes the dataset on the server. + + Returns: + The published dataset object. + + """ + self._api.publish(dataset_id=self._model.id) + return self.get() + def progress(self, with_users_distribution: bool = False) -> dict: """Returns the team's progress on the dataset. @@ -236,12 +256,6 @@ def api_model(self) -> DatasetModel: self._model.workspace_id = self.workspace.id return self._model - def _publish(self) -> "Dataset": - self._settings.create() - # self._api.publish(dataset_id=self._model.id) - - return self.get() - def _resolve_workspace(self) -> Workspace: workspace = self._workspace From e3a31f1cb063662657cc17e5e2be3b26030f6eec Mon Sep 17 00:00:00 2001 From: Francisco Aranda Date: Thu, 3 Oct 2024 15:15:04 +0200 Subject: [PATCH 3/3] chore: Mock method instead of NotImplemented --- argilla/src/argilla/_api/_questions.py | 4 ++-- argilla/src/argilla/settings/_resource.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/argilla/src/argilla/_api/_questions.py b/argilla/src/argilla/_api/_questions.py index 5b112bc76f..2444f9a465 100644 --- a/argilla/src/argilla/_api/_questions.py +++ b/argilla/src/argilla/_api/_questions.py @@ -70,12 +70,12 @@ def update( question: QuestionModel, ) -> QuestionModel: # TODO: Implement update method for fields with server side ID - raise NotImplementedError + return question @api_error_handler def delete(self, question_id: UUID) -> None: # TODO: Implement delete method for fields with server side ID - raise NotImplementedError + pass #################### # Utility methods # diff --git a/argilla/src/argilla/settings/_resource.py b/argilla/src/argilla/settings/_resource.py index 5fc89ae9e3..267c7533b2 100644 --- a/argilla/src/argilla/settings/_resource.py +++ b/argilla/src/argilla/settings/_resource.py @@ -585,5 +585,4 @@ def _update_question(self, question: QuestionType) -> QuestionType: # TODO: Align to the Resource model def _delete_question(self, question: QuestionType) -> None: - question_model = self._settings._client.api.questions.delete(question.id) - question._model = question_model + self._settings._client.api.questions.delete(question.id)