diff --git a/backend/dataset/admin.py b/backend/dataset/admin.py index bd1b5cf18..f07224949 100644 --- a/backend/dataset/admin.py +++ b/backend/dataset/admin.py @@ -51,6 +51,16 @@ class PromptAnswerEvaluationAdmin(ImportExportActionModelAdmin): resource_class = PromptAnswerEvaluationResource +# Custom admin class for Instructions model +class InstructionsAdmin(ImportExportActionModelAdmin): + resource_class = InstructionsResource + + +# Custom admin class for Interactions model +class InteractionsAdmin(ImportExportActionModelAdmin): + resource_class = InteractionsResource + + admin.site.register(SentenceText, SentenceTextAdmin) admin.site.register(TranslationPair, TranslationPairAdmin) admin.site.register(OCRDocument, OCRDocumentAdmin) @@ -60,3 +70,5 @@ class PromptAnswerEvaluationAdmin(ImportExportActionModelAdmin): admin.site.register(PromptBase, PromptBaseAdmin) admin.site.register(PromptAnswer, PromptAnswerAdmin) admin.site.register(PromptAnswerEvaluation, PromptAnswerEvaluationAdmin) +admin.site.register(Instruction, InstructionsAdmin) +admin.site.register(Interaction, InteractionsAdmin) diff --git a/backend/dataset/migrations/0046_auto_20240104_0617.py b/backend/dataset/migrations/0046_auto_20240104_0617.py new file mode 100644 index 000000000..f2dd496cc --- /dev/null +++ b/backend/dataset/migrations/0046_auto_20240104_0617.py @@ -0,0 +1,168 @@ +# Generated by Django 3.2.14 on 2024-01-04 06:17 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + ("tasks", "0048_alter_annotation_unique_together"), + ("dataset", "0045_promptanswer_promptanswerevaluation_promptbase"), + ] + + operations = [ + migrations.CreateModel( + name="Instruction", + fields=[ + ( + "datasetbase_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to="dataset.datasetbase", + ), + ), + ( + "meta_info_model", + models.CharField( + blank=True, + help_text="Model information for the instruction", + max_length=255, + null=True, + verbose_name="Meta Info Model", + ), + ), + ( + "meta_info_auto_generated", + models.BooleanField( + blank=True, + help_text="Whether the instruction has been auto-generated", + null=True, + verbose_name="Meta Info Auto Generated", + ), + ), + ( + "meta_info_intent", + models.CharField( + blank=True, + help_text="Intent information for the instruction", + max_length=255, + null=True, + verbose_name="Meta Info Intent", + ), + ), + ( + "meta_info_domain", + models.CharField( + blank=True, + help_text="Domain information for the instruction", + max_length=255, + null=True, + verbose_name="Meta Info Domain", + ), + ), + ( + "meta_info_structure", + models.CharField( + blank=True, + help_text="Structure information for the instruction", + max_length=255, + null=True, + verbose_name="Meta Info Structure", + ), + ), + ( + "meta_info_language", + models.CharField( + blank=True, + choices=[ + ("English", "English"), + ("Assamese", "Assamese"), + ("Bengali", "Bengali"), + ("Bodo", "Bodo"), + ("Dogri", "Dogri"), + ("Gujarati", "Gujarati"), + ("Hindi", "Hindi"), + ("Kannada", "Kannada"), + ("Kashmiri", "Kashmiri"), + ("Konkani", "Konkani"), + ("Maithili", "Maithili"), + ("Malayalam", "Malayalam"), + ("Manipuri", "Manipuri"), + ("Marathi", "Marathi"), + ("Nepali", "Nepali"), + ("Odia", "Odia"), + ("Punjabi", "Punjabi"), + ("Sanskrit", "Sanskrit"), + ("Santali", "Santali"), + ("Sindhi", "Sindhi"), + ("Sinhala", "Sinhala"), + ("Tamil", "Tamil"), + ("Telugu", "Telugu"), + ("Urdu", "Urdu"), + ], + help_text="Language of the instruction", + max_length=20, + null=True, + verbose_name="Meta Info Language", + ), + ), + ("instruction_data", models.TextField(verbose_name="Instruction_data")), + ("examples", models.TextField(verbose_name="Examples")), + ("hint", models.TextField(verbose_name="Hint")), + ], + bases=("dataset.datasetbase",), + ), + migrations.AlterField( + model_name="datasetinstance", + name="dataset_type", + field=models.CharField( + choices=[ + ("SentenceText", "SentenceText"), + ("TranslationPair", "TranslationPair"), + ("OCRDocument", "OCRDocument"), + ("BlockText", "BlockText"), + ("Conversation", "Conversation"), + ("SpeechConversation", "SpeechConversation"), + ("PromptBase", "PromptBase"), + ("PromptAnswer", "PromptAnswer"), + ("PromptAnswerEvaluation", "PromptAnswerEvaluation"), + ("Interaction", "Interaction"), + ("Instruction", "Instruction"), + ], + help_text="Dataset Type which is specific for each annotation task", + max_length=100, + verbose_name="dataset_type", + ), + ), + migrations.RenameModel( + old_name="Interactions", + new_name="Interaction", + ), + migrations.AlterField( + model_name="interaction", + name="instruction_id", + field=models.ForeignKey( + help_text="ID of the related instruction", + on_delete=django.db.models.deletion.CASCADE, + to="dataset.instruction", + verbose_name="Instruction ID", + ), + ), + migrations.AlterField( + model_name="promptbase", + name="instruction_id", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + to="dataset.instruction", + ), + ), + migrations.DeleteModel( + name="Instructions", + ), + ] diff --git a/backend/dataset/migrations/0047_alter_instruction_meta_info_language.py b/backend/dataset/migrations/0047_alter_instruction_meta_info_language.py new file mode 100644 index 000000000..3a9ea3fe7 --- /dev/null +++ b/backend/dataset/migrations/0047_alter_instruction_meta_info_language.py @@ -0,0 +1,29 @@ +# Generated by Django 3.2.14 on 2024-01-10 07:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("dataset", "0046_auto_20240104_0617"), + ] + + operations = [ + migrations.AlterField( + model_name="instruction", + name="meta_info_language", + field=models.CharField( + blank=True, + choices=[ + ("1", "English(Any script)"), + ("2", "Indic(Indic script)"), + ("3", "Indic(Latin script)"), + ("4", "Indic/English(Latin script)"), + ], + help_text="Language of the instruction", + max_length=20, + null=True, + verbose_name="Meta Info Language", + ), + ), + ] diff --git a/backend/dataset/models.py b/backend/dataset/models.py index 81d6cd7ea..47dfbe153 100644 --- a/backend/dataset/models.py +++ b/backend/dataset/models.py @@ -18,6 +18,8 @@ ("PromptBase", "PromptBase"), ("PromptAnswer", "PromptAnswer"), ("PromptAnswerEvaluation", "PromptAnswerEvaluation"), + ("Interaction", "Interaction"), + ("Instruction", "Instruction"), ] GENDER_CHOICES = (("M", "Male"), ("F", "Female"), ("O", "Others")) @@ -115,6 +117,13 @@ ("Urdu", "Urdu"), ] +LANGUAGE_CHOICES_INSTRUCTIONS = ( + ("1", "English(Any script)"), + ("2", "Indic(Indic script)"), + ("3", "Indic(Latin script)"), + ("4", "Indic/English(Latin script)"), +) + LLM_CHOICES = (("GPT3.5", "GPT3.5"), ("GPT4", "GPT4"), ("LLAMA2", "LLAMA2")) @@ -641,7 +650,7 @@ def __str__(self): # duration = models.TimeField() -class Instructions(DatasetBase): +class Instruction(DatasetBase): """ Subclass model for Instructions """ @@ -682,27 +691,27 @@ class Instructions(DatasetBase): ) meta_info_language = models.CharField( max_length=20, - choices=LANGUAGE_CHOICES, + choices=LANGUAGE_CHOICES_INSTRUCTIONS, verbose_name="Meta Info Language", null=True, blank=True, help_text="Language of the instruction", ) - instruction = models.TextField(verbose_name="Instruction") + instruction_data = models.TextField(verbose_name="Instruction_data") examples = models.TextField(verbose_name="Examples") hint = models.TextField(verbose_name="Hint") def __str__(self): - return f"{self.id} - {self.instruction}" + return f"{self.id} - {self.instruction_data}" -class Interactions(DatasetBase): +class Interaction(DatasetBase): """ Subclass model for Interactions """ instruction_id = models.ForeignKey( - Instructions, + Instruction, on_delete=models.CASCADE, verbose_name="Instruction ID", help_text="ID of the related instruction", @@ -743,7 +752,7 @@ class PromptBase(DatasetBase): help_text=("Prompt of the conversation"), ) instruction_id = models.ForeignKey( - Instructions, on_delete=models.CASCADE, null=True, blank=True + Instruction, on_delete=models.CASCADE, null=True, blank=True ) language = models.CharField( verbose_name="language", choices=LANG_CHOICES, max_length=15 diff --git a/backend/dataset/resources.py b/backend/dataset/resources.py index 459a68cb9..84bde185d 100644 --- a/backend/dataset/resources.py +++ b/backend/dataset/resources.py @@ -144,6 +144,34 @@ class Meta: force_init_instance = True +class InstructionsResource(ModelResource, ResourceMixin): + """ + Import/Export Resource for SpeechConversation + """ + + class Meta: + import_id_fields = ("id",) + exclude = ("datasetbase_ptr",) + model = Instruction + clean_model_instances = True + skip_diff = True + force_init_instance = True + + +class InteractionsResource(ModelResource, ResourceMixin): + """ + Import/Export Resource for SpeechConversation + """ + + class Meta: + import_id_fields = ("id",) + exclude = ("datasetbase_ptr",) + model = Interaction + clean_model_instances = True + skip_diff = True + force_init_instance = True + + # Define a mapping between dataset instance type and resource RESOURCE_MAP = { "TranslationPair": TranslationPairResource, @@ -155,4 +183,6 @@ class Meta: "PromptBase": PromptBaseResource, "PromptAnswer": PromptAnswerResource, "PromptAnswerEvaluation": PromptAnswerEvaluationResource, + "Instruction": InstructionsResource, + "Interaction": InteractionsResource, } diff --git a/backend/dataset/serializers.py b/backend/dataset/serializers.py index d36dba8af..15652c9ae 100644 --- a/backend/dataset/serializers.py +++ b/backend/dataset/serializers.py @@ -134,6 +134,18 @@ class Meta: fields = "__all__" +class InstructionsSerializer(serializers.ModelSerializer): + class Meta: + model = Instruction + fields = "__all__" + + +class InteractionsSerializer(serializers.ModelSerializer): + class Meta: + model = Interaction + fields = "__all__" + + # Define a mapping between dataset instance type and serializer SERIALIZER_MAP = { "SentenceText": SentenceTextSerializer, @@ -145,6 +157,8 @@ class Meta: "PromptBase": PromptBaseSerializer, "PromptAnswer": PromptAnswerSerializer, "PromptAnswerEvaluation": PromptAnswerEvaluationSerializer, + "Instruction": InstructionsSerializer, + "Interaction": InteractionsSerializer, } # class CollectionDatasetSerializer(serializers.ModelSerializer): diff --git a/backend/dataset/urls.py b/backend/dataset/urls.py index d50137772..f196312d7 100644 --- a/backend/dataset/urls.py +++ b/backend/dataset/urls.py @@ -7,6 +7,8 @@ router.register(r"instances", DatasetInstanceViewSet) router.register(r"dataitems", DatasetItemsViewSet) +router.register(r"instructions", InstructionsViewSet, basename="instructions") +router.register(r"interactions", InteractionsViewSet, basename="interactions") # router.register(r"sentences", SentenceTextViewSet) # router.register(r"collection", CollectionDatasetViewSet) # router.register(r"speechcol",SpeechCollectionViewset) diff --git a/backend/dataset/views.py b/backend/dataset/views.py index fc8e9f49b..bebe10779 100644 --- a/backend/dataset/views.py +++ b/backend/dataset/views.py @@ -1414,6 +1414,16 @@ def get(self, request, dataset_type): return Response(dict, status=status.HTTP_200_OK) +class InstructionsViewSet(viewsets.ModelViewSet): + queryset = Instruction.objects.all() + serializer_class = InstructionsSerializer + + +class InteractionsViewSet(viewsets.ModelViewSet): + queryset = Interaction.objects.all() + serializer_class = InteractionsSerializer + + # class SentenceTextViewSet(viewsets.ModelViewSet): # queryset = SentenceText.objects.all() # serializer_class = SentenceTextSerializer diff --git a/backend/deploy/requirements.txt b/backend/deploy/requirements.txt index 8b6c630cb..6f0931906 100644 --- a/backend/deploy/requirements.txt +++ b/backend/deploy/requirements.txt @@ -1,8 +1,11 @@ +aiohttp==3.8.6 +aiosignal==1.3.1 alabaster==0.7.13 amqp==5.1.1 appdirs==1.4.4 asgiref==3.7.2 async-timeout==4.0.3 +asynctest==0.13.0 attr==0.3.1 attrs==23.1.0 azure-core==1.29.2 @@ -29,6 +32,7 @@ coreapi==2.3.3 coreschema==0.0.4 cryptography==41.0.3 defusedxml==0.7.1 +Deprecated==1.2.14 diff-match-patch==20230430 Django==3.2.14 django-annoying==0.10.6 @@ -58,6 +62,8 @@ drf-generators==0.3.0 drf-yasg==1.20.0 et-xmlfile==1.1.0 expiringdict==1.1.4 +frozenlist==1.3.3 +gcloud==0.18.3 google==3.0.0 google-api-core==2.10.0 google-auth==2.11.0 @@ -76,6 +82,7 @@ grpcio==1.57.0 grpcio-status==1.48.2 gunicorn==21.2.0 htmlmin==0.1.12 +httplib2==0.22.0 idna==3.4 imagesize==1.4.1 importlib-metadata==1.7.0 @@ -88,6 +95,7 @@ jiwer==3.0.2 jmespath==0.10.0 joblib==1.3.2 jsonschema==3.2.0 +jwcrypto==1.5.1 kombu==5.2.4 label-studio==1.6.0 label-studio-converter==0.0.44 @@ -100,10 +108,13 @@ MarkupPy==1.14 MarkupSafe==2.1.3 Morfessor==2.0.6 mosestokenizer==1.2.1 +multidict==6.0.4 nltk==3.6.7 numpy==1.21.6 +oauth2client==4.1.3 oauthlib==3.2.2 odfpy==1.4.1 +openai==0.28.1 openapi-codec==1.3.2 openfile==0.0.7 openpyxl==3.1.2 @@ -121,9 +132,11 @@ psycopg2-binary==2.9.1 pyasn1==0.5.0 pyasn1-modules==0.3.0 pycparser==2.21 +pycryptodome==3.19.1 pydantic==1.8.2 Pygments==2.16.1 PyJWT==2.8.0 +pyparsing==3.1.1 Pyrebase4==4.7.1 pyRFC3339==1.1 pyrsistent==0.19.3 @@ -131,6 +144,7 @@ python-crontab==3.0.0 python-dateutil==2.8.1 python-dotenv==0.21.1 python-json-logger==2.0.4 +python-jwt==4.1.0 python3-openid==3.2.0 pytz==2019.3 PyYAML==6.0.1 @@ -139,6 +153,7 @@ redis==5.0.0 regex==2023.8.8 requests==2.27.1 requests-oauthlib==1.3.1 +requests-toolbelt==0.10.1 rq==1.10.1 rsa==4.9 ruamel.yaml==0.17.32 @@ -180,7 +195,9 @@ user-agents==2.2.0 vine==5.0.0 wcwidth==0.2.6 whitenoise==6.5.0 +wrapt==1.16.0 xlrd==2.0.1 xlwt==1.3.0 xmljson==0.2.0 -zipp==3.15.0 +yarl==1.9.4 +zipp==3.15.0 \ No newline at end of file diff --git a/backend/projects/migrations/0053_alter_project_project_type.py b/backend/projects/migrations/0053_alter_project_project_type.py new file mode 100644 index 000000000..cf9b8fac9 --- /dev/null +++ b/backend/projects/migrations/0053_alter_project_project_type.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.14 on 2024-01-04 06:17 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("projects", "0052_auto_20240101_1621"), + ] + + operations = [ + migrations.AlterField( + model_name="project", + name="project_type", + field=models.CharField( + choices=[ + ("ModelOutputEvaluation", "ModelOutputEvaluation"), + ("Instruction Driven Chat", "Instruction Driven Chat"), + ], + help_text="Project Type indicating the annotation task", + max_length=100, + ), + ), + ] diff --git a/backend/projects/migrations/0054_alter_project_project_type.py b/backend/projects/migrations/0054_alter_project_project_type.py new file mode 100644 index 000000000..57ae5eb3a --- /dev/null +++ b/backend/projects/migrations/0054_alter_project_project_type.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.14 on 2024-01-10 03:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("projects", "0053_alter_project_project_type"), + ] + + operations = [ + migrations.AlterField( + model_name="project", + name="project_type", + field=models.CharField( + choices=[ + ("ModelOutputEvaluation", "ModelOutputEvaluation"), + ("InstructionDrivenChat", "InstructionDrivenChat"), + ], + help_text="Project Type indicating the annotation task", + max_length=100, + ), + ), + ] diff --git a/backend/projects/project_registry.yaml b/backend/projects/project_registry.yaml index ca9ba4088..c49f5d7ba 100644 --- a/backend/projects/project_registry.yaml +++ b/backend/projects/project_registry.yaml @@ -19,4 +19,36 @@ ModelOutputEvaluation: annotations: - form_output_json - output_likert_score + - time_taken +Chat: + description: "Instruction Driven Chat" + project_types: + InstructionDrivenChat: + project_mode: "Annotation" + input_dataset: + class: Instruction + fields: + - id + - meta_info_model + - meta_info_intent + - meta_info_domain + - meta_info_language + - instruction_data + - examples + - hint + display_fields: + - instruction + - examples + - hint + output_dataset: + class: Interaction + save_type: new_record + fields: + copy_from_input: + id: instruction_id + annotations: + - interactions_json + - no_of_turns + - language + - model - time_taken \ No newline at end of file diff --git a/backend/projects/tasks.py b/backend/projects/tasks.py index c3f0f9821..1ec9ee3be 100644 --- a/backend/projects/tasks.py +++ b/backend/projects/tasks.py @@ -190,7 +190,8 @@ def create_tasks_from_dataitems(items, project): data = dataset_models.DatasetBase.objects.get(pk=data_id) # Remove data id because it's not needed in task.data - del item["id"] + if "id" in item: + del item["id"] task = Task(data=item, project_id=project, input_data=data) """ if is_translation_project or dataset_type1 == "TranslationPair": diff --git a/backend/tasks/views.py b/backend/tasks/views.py index d1684a017..b460c38ae 100644 --- a/backend/tasks/views.py +++ b/backend/tasks/views.py @@ -26,6 +26,12 @@ from utils.convert_result_to_chitralekha_format import ( convert_result_to_chitralekha_format, ) +from utils.llm_checks import ( + evaluate_prompt_alignment, + prompt_lang_check, + duplicate_check, +) +from utils.llm_interactions import get_model_output from utils.search import process_search_query @@ -1334,7 +1340,17 @@ def partial_update(self, request, pk=None): == 1, ) else: - annotation_obj.result = request.data["result"] + if ( + annotation_obj.task.project_id.project_type + == "InstructionDrivenChat" + ): + annotation_obj.result = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) + else: + annotation_obj.result = request.data["result"] if "annotation_notes" in dict(request.data): annotation_obj.annotation_notes = request.data["annotation_notes"] update_fields_list.append("annotation_notes") @@ -1386,6 +1402,15 @@ def partial_update(self, request, pk=None): ] == 1, ) + if ( + annotation_obj.task.project_id.project_type + == "InstructionDrivenChat" + ): + request.data["result"] = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) annotation_response = super().partial_update(request) annotation_id = annotation_response.data["id"] annotation = Annotation.objects.get(pk=annotation_id) @@ -1442,7 +1467,17 @@ def partial_update(self, request, pk=None): <= 2, ) else: - annotation_obj.result = request.data["result"] + if ( + annotation_obj.task.project_id.project_type + == "InstructionDrivenChat" + ): + annotation_obj.result = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) + else: + annotation_obj.result = request.data["result"] if "review_notes" in dict(request.data): annotation_obj.review_notes = request.data["review_notes"] update_fields_list.append("review_notes") @@ -1533,6 +1568,15 @@ def partial_update(self, request, pk=None): ] <= 2, ) + if ( + annotation_obj.task.project_id.project_type + == "InstructionDrivenChat" + ): + request.data["result"] = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) annotation_response = super().partial_update(request) annotation_id = annotation_response.data["id"] annotation = Annotation.objects.get(pk=annotation_id) @@ -1616,7 +1660,17 @@ def partial_update(self, request, pk=None): <= 3, ) else: - annotation_obj.result = request.data["result"] + if ( + annotation_obj.task.project_id.project_type + == "InstructionDrivenChat" + ): + annotation_obj.result = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) + else: + annotation_obj.result = request.data["result"] if "supercheck_notes" in dict(request.data): annotation_obj.supercheck_notes = request.data["supercheck_notes"] update_fields_list.append("supercheck_notes") @@ -1698,6 +1752,15 @@ def partial_update(self, request, pk=None): ] <= 3, ) + if ( + annotation_obj.task.project_id.project_type + == "InstructionDrivenChat" + ): + request.data["result"] = get_llm_output( + request.data["result"], + annotation_obj.task, + annotation_obj, + ) annotation_response = super().partial_update(request) annotation_id = annotation_response.data["id"] annotation = Annotation.objects.get(pk=annotation_id) @@ -2009,3 +2072,30 @@ def calculate_bleu_score(self, request): {"message": "Invalid parameters in request body!"}, status=status.HTTP_400_BAD_REQUEST, ) + + +def get_llm_output(prompt, task, annotation, complete_checking): + # CHECKS + intent = task["data"]["meta_info_intent"] + domain = task["data"]["meta_info_domain"] + lang_type = task["data"]["meta_info_language"] + ann_result = json.loads(annotation.result) + if len(ann_result) == 0 and complete_checking: + if intent and domain: + intent_check, domain_check, reason = evaluate_prompt_alignment( + prompt, domain, intent + ) + if lang_type: + lang_check = prompt_lang_check(prompt, lang_type) + if len(ann_result) >= 0: + dup_check, message = duplicate_check(ann_result, prompt) + + # GET MODEL OUTPUT + history = ann_result + model = task["data"]["meta_info_model"] + output = get_model_output(prompt, history, model) + get_model_output( + "You are a very kind and helpful assistant!", prompt, history, model + ) + ann_result.append({"prompt": prompt, "output": output}) + return ann_result diff --git a/backend/utils/llm_checks.py b/backend/utils/llm_checks.py new file mode 100644 index 000000000..46089cacb --- /dev/null +++ b/backend/utils/llm_checks.py @@ -0,0 +1,149 @@ +import os +import numpy as np +import json +import ast +import openai +import requests + +from dataset.models import Interaction +from tasks.models import Annotation + + +def get_response_for_domain_and_intent(prompt): + openai.api_key = os.getenv("LLM_CHECKS_OPENAI_API_KEY") + response = openai.ChatCompletion.create( + engine="prompt-chat-gpt35", + messages=[{"role": "user", "content": prompt}], + temperature=0.1, + max_tokens=256, + # top_p=1, + frequency_penalty=0, + presence_penalty=0, + ) + + return response["choices"][0]["message"]["content"] + + +def get_lid(text): + """ + Determine the language and script of the given text using the IndicLID model. + """ + + # The inference server URL + TRITON_SERVER_URL = os.getenv("LLM_CHECKS_TRITON_SERVER_URL") + + # Authentication header + headers = { + "Authorization": os.getenv("LLM_CHECKS_TRITON_SERVER_URL_AUTH"), + "Content-Type": "application/json", + } + + # Prepare the input data + input_data = np.array([[text]], dtype=object).tolist() + + # Prepare the request body + body = json.dumps( + { + "inputs": [ + { + "name": "TEXT", + "shape": [1, 1], + "datatype": "BYTES", + "data": input_data, + } + ], + "outputs": [{"name": "LANGUAGES"}], + } + ) + + # Make the request + response = requests.post(TRITON_SERVER_URL, headers=headers, data=body) + + # Check if the request was successful + if response.status_code != 200: + print("Error during inference request:", response.text) + return None + + # Extract results from the response + output_data = json.loads(response.text) + languages = json.loads(output_data["outputs"][0]["data"][0]) + + return languages[1] + + +def prompt_lang_check(prompt, lang_type): + """ + Checks if the given prompt matches the specified language and script criteria. + + Parameters: + - user_lang (str): Language given by the user. + - prompt (str): Text input to verify. + - lang_type (int): Criteria type for language and script checking. + + Returns: + - bool: True if criteria are met, False otherwise. + """ + + # get detected language and script from IndicLID + + detected_language, detected_script = get_lid(prompt) + + # Type 1 : Prompts in English + + if lang_type == 1: + # Detected language must be english + if detected_language != "eng": + return False + + # Type 2 : Prompts in Indic Language and indic scripts + elif lang_type == 2: + # Detected language must match the user entered language + if detected_language == "eng": + return False + + # Detected script must be Indic script + if detected_script == "Latn": + return False + + # Type 3 : Prompts in Indic Language and latin script (transliterated) + elif lang_type == 3: + # Detected language must match the user entered language + if detected_language == "eng": + return False + + # Detected script must be Latin script + if detected_script != "Latn": + return False + + # Type 4 : Prompts must be english-indic code mixed in latin script + elif lang_type == 4: + # Detected language should be indic or english + if detected_language == "other": + return False + + # Detected script must be latin + if detected_script != "Latn": + return False + + return True + + +def evaluate_prompt_alignment(prompt, target_domain, target_intent): + context = f""" + On a scale of 1 to 5, how well does the statement '{prompt}' align with the intent of '{target_intent}' + and domain of '{target_domain}' (1 - highly unaligned, 5 - perfectly aligned)? + Be very lenient in checking. Output a json string with the keys- intent_score, domain_score, reason + Output: """ + resp_dict = ast.literal_eval(get_response_for_domain_and_intent(context)) + + intent = True if resp_dict["intent_score"] or target_intent is None >= 3 else False + domain = True if resp_dict["domain_score"] or target_domain is None >= 3 else False + + return intent, domain, resp_dict["reason"] + + +def duplicate_check(ann_result, prompt): + for r in ann_result: + if r["prompt"] == prompt: + return False + return True diff --git a/backend/utils/llm_interactions.py b/backend/utils/llm_interactions.py new file mode 100644 index 000000000..d78dd8388 --- /dev/null +++ b/backend/utils/llm_interactions.py @@ -0,0 +1,141 @@ +import os + +# https://pypi.org/project/openai/ +import openai +from django.http import JsonResponse +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM + + +# def generate_response_from_gpt(gpt_prompt): +# messages = [] +# for prompt in gpt_prompt: +# messages.append({"role": "user", "content": prompt}) +# organisation_key = os.getenv("organisation_key") +# openai.api_key = os.getenv("api_key_gpt_3.5") +# client = OpenAI(api_key=openai.api_key, organization=organisation_key) +# response = client.chat.completions.create( +# model="gpt-3.5-turbo", +# messages=messages +# ) +# return response.choices[0].message.content.strip() + + +# import langdetect +# +# def check_language_consistency(texts, target_language): +# """ +# Checks if all paragraphs/sentences in the given text are in the same language. +# +# Args: +# texts (list): A list of paragraphs or sentences to check. +# target_language (str): The language code to check against (e.g., 'en', 'fr', 'es'). +# +# Returns: +# bool: True if all texts are in the target language, False otherwise. +# """ +# try: +# detected_languages = set(langdetect.detect(text) for text in texts) +# return len(detected_languages) == 1 and target_language in detected_languages +# except langdetect.lang_detect_exception.LangDetectException: +# return False + + +import os +import openai +import requests + + +def process_history(history): + messages = [] + for turn in history: + user_side = {"role": "user", "content": turn["prompt"]} + messages.append(user_side) + system_side = {"role": "assistant", "content": turn["output"]} + messages.append(system_side) + return messages + + +def get_gpt4_output(system_prompt, user_prompt, history): + openai.api_type = os.getenv("LLM_INTERACTIONS_OPENAI_API_TYPE") + openai.api_base = os.getenv("LLM_INTERACTIONS_OPENAI_API_BASE") + openai.api_version = os.getenv("LLM_INTERACTIONS_OPENAI_API_VERSION") + openai.api_key = os.getenv("LLM_INTERACTIONS_OPENAI_API_KEY") + engine = "prompt-chat-gpt4" + + history = process_history(history) + messages = [{"role": "system", "content": system_prompt}] + messages.extend(history) + messages.append({"role": "user", "content": user_prompt}) + + response = openai.ChatCompletion.create( + engine=engine, + messages=messages, + temperature=0.7, + max_tokens=700, + top_p=0.95, + frequency_penalty=0, + presence_penalty=0, + stop=None, + ) + + return response["choices"][0]["message"]["content"].strip() + + +def get_gpt3_output(system_prompt, user_prompt, history): + openai.api_type = os.getenv("LLM_INTERACTIONS_OPENAI_API_TYPE") + openai.api_base = os.getenv("LLM_INTERACTIONS_OPENAI_API_BASE") + openai.api_version = os.getenv("LLM_INTERACTIONS_OPENAI_API_VERSION") + openai.api_key = os.getenv("LLM_INTERACTIONS_OPENAI_API_KEY") + engine = "prompt-chat-gpt35" + + history = process_history(history) + messages = [{"role": "system", "content": system_prompt}] + messages.extend(history) + messages.append({"role": "user", "content": user_prompt}) + + response = openai.ChatCompletion.create( + engine=engine, + messages=messages, + temperature=0.7, + max_tokens=700, + top_p=0.95, + frequency_penalty=0, + presence_penalty=0, + stop=None, + ) + + return response["choices"][0]["message"]["content"].strip() + + +def get_llama2_output(system_prompt, conv_history, user_prompt): + api_base = os.getenv("LLM_INTERACTION_LLAMA2_API_BASE") + token = os.getenv("LLM_INTERACTION_LLAMA2_API_TOKEN") + url = f"{api_base}/chat/completions" + + history = process_history(conv_history) + messages = [{"role": "system", "content": system_prompt}] + messages.extend(history) + messages.append({"role": "user", "content": user_prompt}) + + body = { + "model": "meta-llama/Llama-2-70b-chat-hf", + "messages": messages, + "temperature": 0.2, + "max_new_tokens": 500, + "top_p": 1, + } + s = requests.Session() + result = s.post(url, headers={"Authorization": f"Bearer {token}"}, json=body) + return result.json()["choices"][0]["message"]["content"].strip() + + +def get_model_output(system_prompt, user_prompt, history, model="gpt3.5"): + # Assume that translation happens outside (and the prompt is already translated) + out = "" + if model == "gpt3.5": + out = get_gpt3_output(system_prompt, user_prompt, history) + elif model == "gpt4": + out = get_gpt4_output(system_prompt, user_prompt, history) + elif model == "llama2": + out = get_llama2_output(system_prompt, history, user_prompt) + return out diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index b4c2da68c..0e907bafe 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -899,7 +899,7 @@ def user_analytics(self, request, pk=None): workspace_reviewer_list.extend(reviewer_ids) workspace_reviewer_list = list(set(workspace_reviewer_list)) - if user_id not in workspace_superchecker_list: + if user_id not in workspace_reviewer_list: final_response = { "message": "You do not have enough permissions to access this view!" }