integration modifications

AI4Bharat · Jan 10, 2024 · 0880781 · 0880781
1 parent 475e717
commit 0880781
Show file tree

Hide file tree

Showing 10 changed files with 136 additions and 129 deletions.
diff --git a/backend/dataset/migrations/0047_alter_instruction_meta_info_language.py b/backend/dataset/migrations/0047_alter_instruction_meta_info_language.py
@@ -0,0 +1,29 @@
+# Generated by Django 3.2.14 on 2024-01-10 07:16
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("dataset", "0046_auto_20240104_0617"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="instruction",
+            name="meta_info_language",
+            field=models.CharField(
+                blank=True,
+                choices=[
+                    ("1", "English(Any script)"),
+                    ("2", "Indic(Indic script)"),
+                    ("3", "Indic(Latin script)"),
+                    ("4", "Indic/English(Latin script)"),
+                ],
+                help_text="Language of the instruction",
+                max_length=20,
+                null=True,
+                verbose_name="Meta Info Language",
+            ),
+        ),
+    ]
diff --git a/backend/dataset/models.py b/backend/dataset/models.py
@@ -117,6 +117,13 @@
     ("Urdu", "Urdu"),
 ]
 
+LANGUAGE_CHOICES_INSTRUCTIONS = (
+    ("1", "English(Any script)"),
+    ("2", "Indic(Indic script)"),
+    ("3", "Indic(Latin script)"),
+    ("4", "Indic/English(Latin script)"),
+)
+
 LLM_CHOICES = (("GPT3.5", "GPT3.5"), ("GPT4", "GPT4"), ("LLAMA2", "LLAMA2"))
 
 
@@ -684,7 +691,7 @@ class Instruction(DatasetBase):
     )
     meta_info_language = models.CharField(
         max_length=20,
-        choices=LANGUAGE_CHOICES,
+        choices=LANGUAGE_CHOICES_INSTRUCTIONS,
         verbose_name="Meta Info Language",
         null=True,
         blank=True,
@@ -695,7 +702,7 @@ class Instruction(DatasetBase):
     hint = models.TextField(verbose_name="Hint")
 
     def __str__(self):
-        return f"{self.id} - {self.instruction}"
+        return f"{self.id} - {self.instruction_data}"
 
 
 class Interaction(DatasetBase):

diff --git a/backend/projects/migrations/0054_alter_project_project_type.py b/backend/projects/migrations/0054_alter_project_project_type.py
@@ -0,0 +1,24 @@
+# Generated by Django 3.2.14 on 2024-01-10 03:10
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("projects", "0053_alter_project_project_type"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="project",
+            name="project_type",
+            field=models.CharField(
+                choices=[
+                    ("ModelOutputEvaluation", "ModelOutputEvaluation"),
+                    ("InstructionDrivenChat", "InstructionDrivenChat"),
+                ],
+                help_text="Project Type indicating the annotation task",
+                max_length=100,
+            ),
+        ),
+    ]
diff --git a/backend/projects/project_registry.yaml b/backend/projects/project_registry.yaml
@@ -44,6 +44,8 @@ Chat:
         class: Interaction
         save_type: new_record
         fields:
+          copy_from_input:
+            id: instruction_id
           annotations:
             - interactions_json
             - no_of_turns

diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py
@@ -190,7 +190,8 @@ def create_tasks_from_dataitems(items, project):
         data = dataset_models.DatasetBase.objects.get(pk=data_id)
 
         # Remove data id because it's not needed in task.data
-        del item["id"]
+        if "id" in item:
+            del item["id"]
         task = Task(data=item, project_id=project, input_data=data)
         """
         if is_translation_project or dataset_type1 == "TranslationPair":

diff --git a/backend/tasks/views.py b/backend/tasks/views.py
@@ -1344,12 +1344,11 @@ def partial_update(self, request, pk=None):
                         annotation_obj.task.project_id.project_type
                         == "InstructionDrivenChat"
                     ):
-                        if len(request.data["result"]) > 0:
-                            annotation_obj.result = get_llm_output(
-                                request.data["result"],
-                                annotation_obj.task,
-                                annotation_obj,
-                            )
+                        annotation_obj.result = get_llm_output(
+                            request.data["result"],
+                            annotation_obj.task,
+                            annotation_obj,
+                        )
                     else:
                         annotation_obj.result = request.data["result"]
                 if "annotation_notes" in dict(request.data):
@@ -1407,12 +1406,11 @@ def partial_update(self, request, pk=None):
                     annotation_obj.task.project_id.project_type
                     == "InstructionDrivenChat"
                 ):
-                    if len(request.data["result"]) > 0:
-                        request.data["result"] = get_llm_output(
-                            request.data["result"],
-                            annotation_obj.task,
-                            annotation_obj,
-                        )
+                    request.data["result"] = get_llm_output(
+                        request.data["result"],
+                        annotation_obj.task,
+                        annotation_obj,
+                    )
                 annotation_response = super().partial_update(request)
                 annotation_id = annotation_response.data["id"]
                 annotation = Annotation.objects.get(pk=annotation_id)
@@ -1473,12 +1471,11 @@ def partial_update(self, request, pk=None):
                         annotation_obj.task.project_id.project_type
                         == "InstructionDrivenChat"
                     ):
-                        if len(request.data["result"]) > 0:
-                            annotation_obj.result = get_llm_output(
-                                request.data["result"],
-                                annotation_obj.task,
-                                annotation_obj,
-                            )
+                        annotation_obj.result = get_llm_output(
+                            request.data["result"],
+                            annotation_obj.task,
+                            annotation_obj,
+                        )
                     else:
                         annotation_obj.result = request.data["result"]
                 if "review_notes" in dict(request.data):
@@ -1575,12 +1572,11 @@ def partial_update(self, request, pk=None):
                     annotation_obj.task.project_id.project_type
                     == "InstructionDrivenChat"
                 ):
-                    if len(request.data["result"]) > 0:
-                        request.data["result"] = get_llm_output(
-                            request.data["result"],
-                            annotation_obj.task,
-                            annotation_obj,
-                        )
+                    request.data["result"] = get_llm_output(
+                        request.data["result"],
+                        annotation_obj.task,
+                        annotation_obj,
+                    )
                 annotation_response = super().partial_update(request)
                 annotation_id = annotation_response.data["id"]
                 annotation = Annotation.objects.get(pk=annotation_id)
@@ -1668,12 +1664,11 @@ def partial_update(self, request, pk=None):
                         annotation_obj.task.project_id.project_type
                         == "InstructionDrivenChat"
                     ):
-                        if len(request.data["result"]) > 0:
-                            annotation_obj.result = get_llm_output(
-                                request.data["result"],
-                                annotation_obj.task,
-                                annotation_obj,
-                            )
+                        annotation_obj.result = get_llm_output(
+                            request.data["result"],
+                            annotation_obj.task,
+                            annotation_obj,
+                        )
                     else:
                         annotation_obj.result = request.data["result"]
                 if "supercheck_notes" in dict(request.data):
@@ -1761,12 +1756,11 @@ def partial_update(self, request, pk=None):
                     annotation_obj.task.project_id.project_type
                     == "InstructionDrivenChat"
                 ):
-                    if len(request.data["result"]) > 0:
-                        request.data["result"] = get_llm_output(
-                            request.data["result"],
-                            annotation_obj.task,
-                            annotation_obj,
-                        )
+                    request.data["result"] = get_llm_output(
+                        request.data["result"],
+                        annotation_obj.task,
+                        annotation_obj,
+                    )
                 annotation_response = super().partial_update(request)
                 annotation_id = annotation_response.data["id"]
                 annotation = Annotation.objects.get(pk=annotation_id)
@@ -2080,26 +2074,28 @@ def calculate_bleu_score(self, request):
             )
 
 
-def get_llm_output(prompt, task, annotation):
-    ##############
-    user_language = "hin"
+def get_llm_output(prompt, task, annotation, complete_checking):
     # CHECKS
     intent = task["data"]["meta_info_intent"]
     domain = task["data"]["meta_info_domain"]
     lang_type = task["data"]["meta_info_language"]
-    if intent and domain:
-        intent_check, domain_check, reason = evaluate_prompt_alignment(
-            prompt, domain, intent
-        )
-    if user_language and lang_type:
-        lang_check = prompt_lang_check(user_language, prompt, lang_type)
-    dup_check, message = duplicate_check(annotation, prompt)
+    ann_result = json.loads(annotation.result)
+    if len(ann_result) == 0 and complete_checking:
+        if intent and domain:
+            intent_check, domain_check, reason = evaluate_prompt_alignment(
+                prompt, domain, intent
+            )
+    if lang_type:
+        lang_check = prompt_lang_check(prompt, lang_type)
+    if len(ann_result) >= 0:
+        dup_check, message = duplicate_check(ann_result, prompt)
 
     # GET MODEL OUTPUT
-    ##############
-    history = json.loads(annotation.result)
+    history = ann_result
     model = task["data"]["meta_info_model"]
     output = get_model_output(prompt, history, model)
-    existing_result = json.loads(annotation.result)
-    existing_result.append({"prompt": prompt, "output": output})
-    return existing_result
+    get_model_output(
+        "You are a very kind and helpful assistant!", prompt, history, model
+    )
+    ann_result.append({"prompt": prompt, "output": output})
+    return ann_result
diff --git a/backend/users/migrations/0031_auto_20231227_1055.py b/backend/users/migrations/0031_auto_20231227_1055.py
diff --git a/backend/utils/llm_checks.py b/backend/utils/llm_checks.py
@@ -71,7 +71,7 @@ def get_lid(text):
     return languages[1]
 
 
-def prompt_lang_check(user_lang, prompt, lang_type):
+def prompt_lang_check(prompt, lang_type):
     """
     Checks if the given prompt matches the specified language and script criteria.
 
@@ -142,9 +142,8 @@ def evaluate_prompt_alignment(prompt, target_domain, target_intent):
     return intent, domain, resp_dict["reason"]
 
 
-def duplicate_check(annotation, prompt):
-    existingData = json.loads(annotation.result)
-    if prompt in existingData:
-        return False, "Duplicate prompt"
-    else:
-        return True, "Original prompt"
+def duplicate_check(ann_result, prompt):
+    for r in ann_result:
+        if r["prompt"] == prompt:
+            return False
+    return True