diff --git a/backend/dataset/migrations/0047_alter_instruction_meta_info_language.py b/backend/dataset/migrations/0047_alter_instruction_meta_info_language.py new file mode 100644 index 000000000..3a9ea3fe7 --- /dev/null +++ b/backend/dataset/migrations/0047_alter_instruction_meta_info_language.py @@ -0,0 +1,29 @@ +# Generated by Django 3.2.14 on 2024-01-10 07:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("dataset", "0046_auto_20240104_0617"), + ] + + operations = [ + migrations.AlterField( + model_name="instruction", + name="meta_info_language", + field=models.CharField( + blank=True, + choices=[ + ("1", "English(Any script)"), + ("2", "Indic(Indic script)"), + ("3", "Indic(Latin script)"), + ("4", "Indic/English(Latin script)"), + ], + help_text="Language of the instruction", + max_length=20, + null=True, + verbose_name="Meta Info Language", + ), + ), + ] diff --git a/backend/dataset/models.py b/backend/dataset/models.py index a764b88fe..47dfbe153 100644 --- a/backend/dataset/models.py +++ b/backend/dataset/models.py @@ -117,6 +117,13 @@ ("Urdu", "Urdu"), ] +LANGUAGE_CHOICES_INSTRUCTIONS = ( + ("1", "English(Any script)"), + ("2", "Indic(Indic script)"), + ("3", "Indic(Latin script)"), + ("4", "Indic/English(Latin script)"), +) + LLM_CHOICES = (("GPT3.5", "GPT3.5"), ("GPT4", "GPT4"), ("LLAMA2", "LLAMA2")) @@ -684,7 +691,7 @@ class Instruction(DatasetBase): ) meta_info_language = models.CharField( max_length=20, - choices=LANGUAGE_CHOICES, + choices=LANGUAGE_CHOICES_INSTRUCTIONS, verbose_name="Meta Info Language", null=True, blank=True, @@ -695,7 +702,7 @@ class Instruction(DatasetBase): hint = models.TextField(verbose_name="Hint") def __str__(self): - return f"{self.id} - {self.instruction}" + return f"{self.id} - {self.instruction_data}" class Interaction(DatasetBase): diff --git a/backend/projects/migrations/0054_alter_project_project_type.py b/backend/projects/migrations/0054_alter_project_project_type.py new file mode 100644 index 000000000..57ae5eb3a --- /dev/null +++ b/backend/projects/migrations/0054_alter_project_project_type.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.14 on 2024-01-10 03:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("projects", "0053_alter_project_project_type"), + ] + + operations = [ + migrations.AlterField( + model_name="project", + name="project_type", + field=models.CharField( + choices=[ + ("ModelOutputEvaluation", "ModelOutputEvaluation"), + ("InstructionDrivenChat", "InstructionDrivenChat"), + ], + help_text="Project Type indicating the annotation task", + max_length=100, + ), + ), + ] diff --git a/backend/projects/project_registry.yaml b/backend/projects/project_registry.yaml index 1891f81ff..c49f5d7ba 100644 --- a/backend/projects/project_registry.yaml +++ b/backend/projects/project_registry.yaml @@ -44,6 +44,8 @@ Chat: class: Interaction save_type: new_record fields: + copy_from_input: + id: instruction_id annotations: - interactions_json - no_of_turns diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py index c3f0f9821..1ec9ee3be 100644 --- a/backend/projects/tasks.py +++ b/backend/projects/tasks.py @@ -190,7 +190,8 @@ def create_tasks_from_dataitems(items, project): data = dataset_models.DatasetBase.objects.get(pk=data_id) # Remove data id because it's not needed in task.data - del item["id"] + if "id" in item: + del item["id"] task = Task(data=item, project_id=project, input_data=data) """ if is_translation_project or dataset_type1 == "TranslationPair": diff --git a/backend/tasks/views.py b/backend/tasks/views.py index ed9a63b30..b460c38ae 100644 --- a/backend/tasks/views.py +++ b/backend/tasks/views.py @@ -1344,12 +1344,11 @@ def partial_update(self, request, pk=None): annotation_obj.task.project_id.project_type == "InstructionDrivenChat" ): - if len(request.data["result"]) > 0: - annotation_obj.result = get_llm_output( - request.data["result"], - annotation_obj.task, - annotation_obj, - ) + annotation_obj.result = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) else: annotation_obj.result = request.data["result"] if "annotation_notes" in dict(request.data): @@ -1407,12 +1406,11 @@ def partial_update(self, request, pk=None): annotation_obj.task.project_id.project_type == "InstructionDrivenChat" ): - if len(request.data["result"]) > 0: - request.data["result"] = get_llm_output( - request.data["result"], - annotation_obj.task, - annotation_obj, - ) + request.data["result"] = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) annotation_response = super().partial_update(request) annotation_id = annotation_response.data["id"] annotation = Annotation.objects.get(pk=annotation_id) @@ -1473,12 +1471,11 @@ def partial_update(self, request, pk=None): annotation_obj.task.project_id.project_type == "InstructionDrivenChat" ): - if len(request.data["result"]) > 0: - annotation_obj.result = get_llm_output( - request.data["result"], - annotation_obj.task, - annotation_obj, - ) + annotation_obj.result = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) else: annotation_obj.result = request.data["result"] if "review_notes" in dict(request.data): @@ -1575,12 +1572,11 @@ def partial_update(self, request, pk=None): annotation_obj.task.project_id.project_type == "InstructionDrivenChat" ): - if len(request.data["result"]) > 0: - request.data["result"] = get_llm_output( - request.data["result"], - annotation_obj.task, - annotation_obj, - ) + request.data["result"] = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) annotation_response = super().partial_update(request) annotation_id = annotation_response.data["id"] annotation = Annotation.objects.get(pk=annotation_id) @@ -1668,12 +1664,11 @@ def partial_update(self, request, pk=None): annotation_obj.task.project_id.project_type == "InstructionDrivenChat" ): - if len(request.data["result"]) > 0: - annotation_obj.result = get_llm_output( - request.data["result"], - annotation_obj.task, - annotation_obj, - ) + annotation_obj.result = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) else: annotation_obj.result = request.data["result"] if "supercheck_notes" in dict(request.data): @@ -1761,12 +1756,11 @@ def partial_update(self, request, pk=None): annotation_obj.task.project_id.project_type == "InstructionDrivenChat" ): - if len(request.data["result"]) > 0: - request.data["result"] = get_llm_output( - request.data["result"], - annotation_obj.task, - annotation_obj, - ) + request.data["result"] = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) annotation_response = super().partial_update(request) annotation_id = annotation_response.data["id"] annotation = Annotation.objects.get(pk=annotation_id) @@ -2080,26 +2074,28 @@ def calculate_bleu_score(self, request): ) -def get_llm_output(prompt, task, annotation): - ############## - user_language = "hin" +def get_llm_output(prompt, task, annotation, complete_checking): # CHECKS intent = task["data"]["meta_info_intent"] domain = task["data"]["meta_info_domain"] lang_type = task["data"]["meta_info_language"] - if intent and domain: - intent_check, domain_check, reason = evaluate_prompt_alignment( - prompt, domain, intent - ) - if user_language and lang_type: - lang_check = prompt_lang_check(user_language, prompt, lang_type) - dup_check, message = duplicate_check(annotation, prompt) + ann_result = json.loads(annotation.result) + if len(ann_result) == 0 and complete_checking: + if intent and domain: + intent_check, domain_check, reason = evaluate_prompt_alignment( + prompt, domain, intent + ) + if lang_type: + lang_check = prompt_lang_check(prompt, lang_type) + if len(ann_result) >= 0: + dup_check, message = duplicate_check(ann_result, prompt) # GET MODEL OUTPUT - ############## - history = json.loads(annotation.result) + history = ann_result model = task["data"]["meta_info_model"] output = get_model_output(prompt, history, model) - existing_result = json.loads(annotation.result) - existing_result.append({"prompt": prompt, "output": output}) - return existing_result + get_model_output( + "You are a very kind and helpful assistant!", prompt, history, model + ) + ann_result.append({"prompt": prompt, "output": output}) + return ann_result diff --git a/backend/users/migrations/0031_auto_20231227_1055.py b/backend/users/migrations/0031_auto_20231227_1055.py deleted file mode 100644 index e5fc2d684..000000000 --- a/backend/users/migrations/0031_auto_20231227_1055.py +++ /dev/null @@ -1,57 +0,0 @@ -# Generated by Django 3.2.14 on 2023-12-27 10:55 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("users", "0030_alter_user_profile_photo"), - ] - - operations = [ - migrations.AddField( - model_name="user", - name="address", - field=models.TextField(blank=True, verbose_name="address"), - ), - migrations.AddField( - model_name="user", - name="city", - field=models.CharField(blank=True, max_length=255, verbose_name="city"), - ), - migrations.AddField( - model_name="user", - name="date_of_birth", - field=models.DateField(blank=True, null=True, verbose_name="date_of_birth"), - ), - migrations.AddField( - model_name="user", - name="gender", - field=models.CharField( - blank=True, - choices=[("M", "Male"), ("F", "Female"), ("O", "Other")], - max_length=1, - verbose_name="gender", - ), - ), - migrations.AddField( - model_name="user", - name="guest_user", - field=models.BooleanField( - choices=[(True, "Yes"), (False, "No")], - default=False, - help_text="Indicates whether the user is a guest user.", - verbose_name="guest_user", - ), - ), - migrations.AddField( - model_name="user", - name="pin_code", - field=models.CharField(blank=True, max_length=10, verbose_name="Pin Code"), - ), - migrations.AddField( - model_name="user", - name="state", - field=models.CharField(blank=True, max_length=255, verbose_name="state"), - ), - ] diff --git a/backend/utils/llm_checks.py b/backend/utils/llm_checks.py index 2d62db2a7..46089cacb 100644 --- a/backend/utils/llm_checks.py +++ b/backend/utils/llm_checks.py @@ -71,7 +71,7 @@ def get_lid(text): return languages[1] -def prompt_lang_check(user_lang, prompt, lang_type): +def prompt_lang_check(prompt, lang_type): """ Checks if the given prompt matches the specified language and script criteria. @@ -142,9 +142,8 @@ def evaluate_prompt_alignment(prompt, target_domain, target_intent): return intent, domain, resp_dict["reason"] -def duplicate_check(annotation, prompt): - existingData = json.loads(annotation.result) - if prompt in existingData: - return False, "Duplicate prompt" - else: - return True, "Original prompt" +def duplicate_check(ann_result, prompt): + for r in ann_result: + if r["prompt"] == prompt: + return False + return True diff --git a/backend/utils/llm_interactions.py b/backend/utils/llm_interactions.py index b81ed8310..d78dd8388 100644 --- a/backend/utils/llm_interactions.py +++ b/backend/utils/llm_interactions.py @@ -55,15 +55,18 @@ def process_history(history): return messages -def get_gpt4_output(prompt, history): +def get_gpt4_output(system_prompt, user_prompt, history): openai.api_type = os.getenv("LLM_INTERACTIONS_OPENAI_API_TYPE") openai.api_base = os.getenv("LLM_INTERACTIONS_OPENAI_API_BASE") openai.api_version = os.getenv("LLM_INTERACTIONS_OPENAI_API_VERSION") openai.api_key = os.getenv("LLM_INTERACTIONS_OPENAI_API_KEY") engine = "prompt-chat-gpt4" - messages = process_history(history) - messages.append({"role": "user", "content": prompt}) + history = process_history(history) + messages = [{"role": "system", "content": system_prompt}] + messages.extend(history) + messages.append({"role": "user", "content": user_prompt}) + response = openai.ChatCompletion.create( engine=engine, messages=messages, @@ -78,15 +81,18 @@ def get_gpt4_output(prompt, history): return response["choices"][0]["message"]["content"].strip() -def get_gpt3_output(prompt, history): +def get_gpt3_output(system_prompt, user_prompt, history): openai.api_type = os.getenv("LLM_INTERACTIONS_OPENAI_API_TYPE") openai.api_base = os.getenv("LLM_INTERACTIONS_OPENAI_API_BASE") openai.api_version = os.getenv("LLM_INTERACTIONS_OPENAI_API_VERSION") openai.api_key = os.getenv("LLM_INTERACTIONS_OPENAI_API_KEY") engine = "prompt-chat-gpt35" - messages = process_history(history) - messages.append({"role": "user", "content": prompt}) + history = process_history(history) + messages = [{"role": "system", "content": system_prompt}] + messages.extend(history) + messages.append({"role": "user", "content": user_prompt}) + response = openai.ChatCompletion.create( engine=engine, messages=messages, @@ -123,13 +129,13 @@ def get_llama2_output(system_prompt, conv_history, user_prompt): return result.json()["choices"][0]["message"]["content"].strip() -def get_model_output(prompt, history, model="gpt3.5"): +def get_model_output(system_prompt, user_prompt, history, model="gpt3.5"): # Assume that translation happens outside (and the prompt is already translated) out = "" if model == "gpt3.5": - out = get_gpt3_output(prompt, history) + out = get_gpt3_output(system_prompt, user_prompt, history) elif model == "gpt4": - out = get_gpt4_output(prompt, history) + out = get_gpt4_output(system_prompt, user_prompt, history) elif model == "llama2": - out = get_llama2_output("", history, prompt) + out = get_llama2_output(system_prompt, history, user_prompt) return out diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index b4c2da68c..0e907bafe 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -899,7 +899,7 @@ def user_analytics(self, request, pk=None): workspace_reviewer_list.extend(reviewer_ids) workspace_reviewer_list = list(set(workspace_reviewer_list)) - if user_id not in workspace_superchecker_list: + if user_id not in workspace_reviewer_list: final_response = { "message": "You do not have enough permissions to access this view!" }