Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

evaluate and training new features #794

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
9 changes: 9 additions & 0 deletions bothub/api/v2/evaluate/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ class Meta:
help_text=_("Filter for repository cross_validation results."),
)

type = filters.CharFilter(
field_name="type",
Copy link

@elitonzky elitonzky May 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains trailing whitespace in:
field_name="type",
for more details: https://www.flake8rules.com/rules/W291.html

method="filter_evaluate_type",
help_text=_("Filter by evaluate type")
)

def filter_repository_uuid(self, queryset, name, value):
request = self.request
try:
Expand Down Expand Up @@ -139,6 +145,9 @@ def filter_repository_version(self, queryset, name, value):

def filter_repository_cross_validation(self, queryset, name, value):
return queryset.filter(cross_validation=value)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains whitespace:

for more details: https://www.flake8rules.com/rules/W293.html

def filter_evaluate_type(self, queryset, name, value):
return queryset.filter(evaluate_type=value)


class EvaluateResultFilter(filters.FilterSet):
Expand Down
50 changes: 49 additions & 1 deletion bothub/api/v2/evaluate/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,18 @@ def update(self, instance, validated_data):
class RepositoryEvaluateResultVersionsSerializer(serializers.ModelSerializer):
class Meta:
model = RepositoryEvaluateResult
fields = ["id", "language", "created_at", "version", "cross_validation"]
fields = ["id", "language", "created_at", "version", "cross_validation", "accuracy"]
ref_name = None

language = serializers.SerializerMethodField()
accuracy = serializers.SerializerMethodField()

def get_language(self, obj):
return obj.repository_version_language.language

def get_accuracy(self, obj):
return obj.intent_results.accuracy


class RepositoryEvaluateResultScore(serializers.ModelSerializer):
class Meta:
Expand Down Expand Up @@ -192,6 +196,11 @@ class Meta:
"intent_results",
"entity_results",
"cross_validation",
"accuracy",
"evaluate_type",
"qualitity",
"recommendations",
"count_logs"
]
ref_name = None

Expand All @@ -201,6 +210,11 @@ class Meta:
repository_version = serializers.SerializerMethodField()
intent_results = RepositoryEvaluateResultScore(read_only=True)
entity_results = RepositoryEvaluateResultScore(read_only=True)
accuracy = serializers.SerializerMethodField()
evaluate_type = serializers.IntegerField(required=False, help_text="type from evaluate")
qualitity = serializers.SerializerMethodField()
recommendations = serializers.SerializerMethodField()
count_logs = serializers.SerializerMethodField()

def get_intents_list(self, obj):
return RepositoryEvaluateResultIntentSerializer(
Expand Down Expand Up @@ -297,3 +311,37 @@ def filter_intent(log, intent, min_confidence, max_confidence):
}

return {"total_pages": 0, "current_page": 1, "results": []}

def get_accuracy(self, obj):
return obj.intent_results.accuracy

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains whitespace:

for more details: https://www.flake8rules.com/rules/W293.html

def get_qualitity(self, obj):
intents = json.loads(obj.log)
success_count = 0
for intent in intents:
success_count += 1 if intent.get("intent_status") == "success" else 0
return (success_count * 100) / len(intents)

def get_recommendations(self, obj):
intents = json.loads(obj.log)
count_intents = {}
reccommendations = []
sum_intents = 0
qnt_intents = 0

for intent in intents:
if intent.get("intent") not in count_intents:
qnt_intents += 1
count_intents[intent.get("intent")] = 0
count_intents[intent.get("intent")] += 1
sum_intents += 1

avg_intents = sum_intents / qnt_intents

for intent in count_intents:
if count_intents.get(intent) < avg_intents:
reccommendations.append(intent)
return {"add_phares_to": reccommendations}

def get_count_logs(self, obj):
return len(json.loads(obj.log))
9 changes: 9 additions & 0 deletions bothub/api/v2/repository/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
RequestRepositoryAuthorization,
RepositoryVersionLanguage,
QAKnowledgeBase,
RepositoryNLPLog,
)
from bothub.utils import classifier_choice
from .validators import (
Expand Down Expand Up @@ -486,6 +487,7 @@ class Meta:
"repository_score",
"repository_version_language",
"repository_type",
"has_training",
]
read_only = [
"uuid",
Expand Down Expand Up @@ -617,6 +619,7 @@ class Meta:
repository_type = serializers.CharField(
style={"show": False}, read_only=True, source="repository.repository_type"
)
has_training = serializers.SerializerMethodField()

def get_authorizations(self, obj):
auths = RepositoryAuthorization.objects.filter(
Expand Down Expand Up @@ -874,6 +877,12 @@ def get_repository_score(self, obj):
def get_repository_version_language(self, obj):
return obj.repositoryversionlanguage_set.all().values("id", "language")

def get_has_training(self, obj):
logs = RepositoryNLPLog.objects.filter(
repository_version_language__repository_version__repository=obj.repository
)
return logs.exists()


class RepositoryTrainInfoSerializer(serializers.ModelSerializer):
class Meta:
Expand Down
107 changes: 93 additions & 14 deletions bothub/api/v2/repository/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
RepositoryVote,
RequestRepositoryAuthorization,
RepositoryVersionLanguage,
RepositoryEvaluate,
RepositoryEvaluateResult,
Organization,
)

Expand Down Expand Up @@ -127,6 +129,8 @@
ConnectRESTClient as ConnectClient,
)

from bothub.utils import levenshtein_distance

User = get_user_model()


Expand Down Expand Up @@ -715,21 +719,55 @@ def evaluate(self, request, **kwargs):
user_authorization = repository.get_user_authorization(request.user)
if not user_authorization.can_write:
raise PermissionDenied()
serializer = EvaluateSerializer(data=request.data) # pragma: no cover
serializer.is_valid(raise_exception=True) # pragma: no cover

try:
request = repository.request_nlp_manual_evaluate( # pragma: no cover
user_authorization, serializer.data
)
except DjangoValidationError as e:
raise APIException(e.message, code=400)
data = request.data
response = []
version_languages = RepositoryVersionLanguage.objects.filter(repository_version__pk=data.get("repository_version"))
Copy link

@elitonzky elitonzky May 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line is long exceeding 119 characters, you can adjust it like this:

version_languages = RepositoryVersionLanguage.objects.filter(
    repository_version__pk=data.get("repository_version")
)

for version_language in version_languages:
if not repository.have_at_least_one_test_phrase_registered(version_language.language):
continue
if "language" in data:
data["language"] = version_language.language
else:
data.update({"language": version_language.language})
serializer = EvaluateSerializer(data=data) # pragma: no cover
serializer.is_valid(raise_exception=True) # pragma: no cover

if request.status_code != status.HTTP_200_OK: # pragma: no cover
raise APIException(
{"status_code": request.status_code}, code=request.status_code
) # pragma: no cover
return Response(request.json()) # pragma: no cover
try:
nlp_request = repository.request_nlp_manual_evaluate( # pragma: no cover
user_authorization, serializer.data
)
except DjangoValidationError as e:
raise APIException(e.message, code=400)

if nlp_request.status_code != status.HTTP_200_OK: # pragma: no cover
raise APIException(
{"status_code": nlp_request.status_code}, code=nlp_request.status_code
) # pragma: no cover

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains whitespace:

for more details: https://www.flake8rules.com/rules/W293.html

nlp_response = nlp_request.json()

evaluate_id = nlp_response.get("evaluate_id")
evaluate_result = RepositoryEvaluateResult.objects.get(pk=evaluate_id)
if request.data.get("evaluate_type", False):
evaluate_result.evaluate_type = request.data.get("evaluate_type")
evaluate_result.save()
logs = json.loads(evaluate_result.log)
intent_count = 0
intent_success = 0

for res in logs:
intent_count += 1
intent_success += 1 if res.get("intent_status") == "success" else 0

result_data = {
"accuracy": evaluate_result.intent_results.accuracy,
"intents_count": intent_count,
"intents_success": intent_success,
"evalute_type": evaluate_result.evaluate_type,
}
nlp_response.update(result_data)
response.append(nlp_response)
return Response(response) # pragma: no cover

@action(
detail=True,
Expand Down Expand Up @@ -789,6 +827,47 @@ def check_can_automatic_evaluate(self, request, **kwargs):
return Response(response) # pragma: no cover


@action(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains two blank lines, by default we use 1 line for methods and 2 for classes.

detail=True,
methods=["GET"],
url_name="get-recommendations-repository",
)
def get_recommendations_repository(self, request, **kwargs):
repository = self.get_object()
user_authorization = repository.get_user_authorization(request.user)
if not user_authorization.can_write:
raise PermissionDenied()

examples = RepositoryExample.objects.filter(repository_version_language__repository_version__repository=repository)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line is long exceeding 119 characters, you can adjust it like this:

        examples = RepositoryExample.objects.filter(
            repository_version_language__repository_version__repository=repository
        )

intents = {}
sum_intents = 0
qnt_intents = 0
sum_distance = 0

for example in examples:
if example.intent.text not in intents:
intents[example.intent.text] = {"text": [], "count": 0, "distance": 0}
intents[example.intent.text]["text"].append(example.text)
intents[example.intent.text]["count"] += 1
sum_intents += 1
qnt_intents += 1
response = {"add_phares_to": [], "more_diversity": []}
avg_intents = (sum_intents/qnt_intents)
for intent in intents:
for i in range(0, intents[intent]['count']):
for j in range(i, intents[intent]['count']):
intents[intent]['distance'] += levenshtein_distance(intents[intent]['text'][i], intents[intent]['text'][j])

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line is long exceeding 119 characters, you can adjust it like this:

                for j in range(i, intents[intent]["count"]):
                    intents[intent]["distance"] += levenshtein_distance(
                        intents[intent]["text"][i], intents[intent]["text"][j]
                    )

sum_distance += intents[intent]['distance']

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line contains whitespace:

for more details: https://www.flake8rules.com/rules/W293.html

avg_distance = sum_distance / qnt_intents
for intent in intents:
if intents[intent]['count'] < avg_intents:
response["add_phares_to"].append(intent)
if intents[intent]['distance'] < avg_distance:
response["more_diversity"].append(intent)
return Response(data=response)


@method_decorator(
name="list",
decorator=swagger_auto_schema(
Expand Down
24 changes: 24 additions & 0 deletions bothub/common/migrations/0118_auto_20230517_1711.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 3.2.15 on 2023-05-17 17:11

from django.db import migrations, models
import uuid


class Migration(migrations.Migration):

dependencies = [
('common', '0117_alter_zeroshotoptions_option_uuid'),
]

operations = [
migrations.AddField(
model_name='repositoryevaluateresult',
name='evaluate_type',
field=models.PositiveIntegerField(blank=True, choices=[(0, 'manual'), (1, 'automatic')], default=0, null=True, verbose_name='role'),
),
migrations.AlterField(
model_name='zeroshotoptions',
name='option_uuid',
field=models.UUIDField(default=uuid.UUID('4e0cf37b-a6b2-40fd-a3cb-cce5bda8e14e')),
),
]
12 changes: 12 additions & 0 deletions bothub/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2381,6 +2381,18 @@ class Meta:

cross_validation = models.BooleanField(_("cross validation"), default=False)

TYPE_MANUAL = 0
TYPE_AUTOMATIC = 1

EVALUATE_TYPES_CHOICE = [
(TYPE_MANUAL, "manual"),
(TYPE_AUTOMATIC, "automatic")
]

evaluate_type = models.PositiveIntegerField(
_("role"), choices=EVALUATE_TYPES_CHOICE, default=TYPE_MANUAL, blank=True, null=True
)

def save(self, *args, **kwargs):
repository = self.repository_version_language.repository_version.repository
self.version = repository.evaluations_results().count() + 1
Expand Down
26 changes: 26 additions & 0 deletions bothub/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,3 +512,29 @@ def check_module_permission(claims, user):
"categories_list",
"repository_type",
]


def levenshtein_distance(str1, str2):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lines with blank lines after separators:
520,522,524,529,535.
Lines missing whitespace after ",":
529, 535, 536, 537, 538.
you could do it like this:

def levenshtein_distance(str1, str2):
    size_x = len(str1) + 1
    size_y = len(str2) + 1
    matrix = np.zeros((size_x, size_y))
    for x in range(size_x):
        matrix[x, 0] = x
    for y in range(size_y):
        matrix[0, y] = y

    for x in range(1, size_x):
        for y in range(1, size_y):
            if str1[x - 1] == str2[y - 1]:
                matrix[x, y] = min(
                    matrix[x - 1, y] + 1, matrix[x - 1, y - 1], matrix[x, y - 1] + 1
                )
            else:
                matrix[x, y] = min(
                    matrix[x - 1, y] + 1, matrix[x - 1, y - 1] + 1, matrix[x, y - 1] + 1
                )
    return matrix[size_x - 1, size_y - 1]

size_x = len(str1) + 1
size_y = len(str2) + 1
matrix = np.zeros ((size_x, size_y))
for x in range(size_x):
matrix [x, 0] = x
for y in range(size_y):
matrix [0, y] = y

for x in range(1, size_x):
for y in range(1, size_y):
if str1[x-1] == str2[y-1]:
matrix [x,y] = min(
matrix[x-1, y] + 1,
matrix[x-1, y-1],
matrix[x, y-1] + 1
)
else:
matrix [x,y] = min(
matrix[x-1,y] + 1,
matrix[x-1,y-1] + 1,
matrix[x,y-1] + 1
)
return (matrix[size_x - 1, size_y - 1])