-
-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
evaluate and training new features #794
base: master
Are you sure you want to change the base?
Changes from all commits
6e67569
53371dd
5f6bf1f
87d75d5
a9fa7c0
94aa629
427850c
6358ba8
b20de9c
99d4315
a8c2c4b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -110,6 +110,12 @@ class Meta: | |
help_text=_("Filter for repository cross_validation results."), | ||
) | ||
|
||
type = filters.CharFilter( | ||
field_name="type", | ||
method="filter_evaluate_type", | ||
help_text=_("Filter by evaluate type") | ||
) | ||
|
||
def filter_repository_uuid(self, queryset, name, value): | ||
request = self.request | ||
try: | ||
|
@@ -139,6 +145,9 @@ def filter_repository_version(self, queryset, name, value): | |
|
||
def filter_repository_cross_validation(self, queryset, name, value): | ||
return queryset.filter(cross_validation=value) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line contains whitespace: for more details: https://www.flake8rules.com/rules/W293.html |
||
def filter_evaluate_type(self, queryset, name, value): | ||
return queryset.filter(evaluate_type=value) | ||
|
||
|
||
class EvaluateResultFilter(filters.FilterSet): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -118,14 +118,18 @@ def update(self, instance, validated_data): | |
class RepositoryEvaluateResultVersionsSerializer(serializers.ModelSerializer): | ||
class Meta: | ||
model = RepositoryEvaluateResult | ||
fields = ["id", "language", "created_at", "version", "cross_validation"] | ||
fields = ["id", "language", "created_at", "version", "cross_validation", "accuracy"] | ||
ref_name = None | ||
|
||
language = serializers.SerializerMethodField() | ||
accuracy = serializers.SerializerMethodField() | ||
|
||
def get_language(self, obj): | ||
return obj.repository_version_language.language | ||
|
||
def get_accuracy(self, obj): | ||
return obj.intent_results.accuracy | ||
|
||
|
||
class RepositoryEvaluateResultScore(serializers.ModelSerializer): | ||
class Meta: | ||
|
@@ -192,6 +196,11 @@ class Meta: | |
"intent_results", | ||
"entity_results", | ||
"cross_validation", | ||
"accuracy", | ||
"evaluate_type", | ||
"qualitity", | ||
"recommendations", | ||
"count_logs" | ||
] | ||
ref_name = None | ||
|
||
|
@@ -201,6 +210,11 @@ class Meta: | |
repository_version = serializers.SerializerMethodField() | ||
intent_results = RepositoryEvaluateResultScore(read_only=True) | ||
entity_results = RepositoryEvaluateResultScore(read_only=True) | ||
accuracy = serializers.SerializerMethodField() | ||
evaluate_type = serializers.IntegerField(required=False, help_text="type from evaluate") | ||
qualitity = serializers.SerializerMethodField() | ||
recommendations = serializers.SerializerMethodField() | ||
count_logs = serializers.SerializerMethodField() | ||
|
||
def get_intents_list(self, obj): | ||
return RepositoryEvaluateResultIntentSerializer( | ||
|
@@ -297,3 +311,37 @@ def filter_intent(log, intent, min_confidence, max_confidence): | |
} | ||
|
||
return {"total_pages": 0, "current_page": 1, "results": []} | ||
|
||
def get_accuracy(self, obj): | ||
return obj.intent_results.accuracy | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line contains whitespace: for more details: https://www.flake8rules.com/rules/W293.html |
||
def get_qualitity(self, obj): | ||
intents = json.loads(obj.log) | ||
success_count = 0 | ||
for intent in intents: | ||
success_count += 1 if intent.get("intent_status") == "success" else 0 | ||
return (success_count * 100) / len(intents) | ||
|
||
def get_recommendations(self, obj): | ||
intents = json.loads(obj.log) | ||
count_intents = {} | ||
reccommendations = [] | ||
sum_intents = 0 | ||
qnt_intents = 0 | ||
|
||
for intent in intents: | ||
if intent.get("intent") not in count_intents: | ||
qnt_intents += 1 | ||
count_intents[intent.get("intent")] = 0 | ||
count_intents[intent.get("intent")] += 1 | ||
sum_intents += 1 | ||
|
||
avg_intents = sum_intents / qnt_intents | ||
|
||
for intent in count_intents: | ||
if count_intents.get(intent) < avg_intents: | ||
reccommendations.append(intent) | ||
return {"add_phares_to": reccommendations} | ||
|
||
def get_count_logs(self, obj): | ||
return len(json.loads(obj.log)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,6 +60,8 @@ | |
RepositoryVote, | ||
RequestRepositoryAuthorization, | ||
RepositoryVersionLanguage, | ||
RepositoryEvaluate, | ||
RepositoryEvaluateResult, | ||
Organization, | ||
) | ||
|
||
|
@@ -127,6 +129,8 @@ | |
ConnectRESTClient as ConnectClient, | ||
) | ||
|
||
from bothub.utils import levenshtein_distance | ||
|
||
User = get_user_model() | ||
|
||
|
||
|
@@ -715,21 +719,55 @@ def evaluate(self, request, **kwargs): | |
user_authorization = repository.get_user_authorization(request.user) | ||
if not user_authorization.can_write: | ||
raise PermissionDenied() | ||
serializer = EvaluateSerializer(data=request.data) # pragma: no cover | ||
serializer.is_valid(raise_exception=True) # pragma: no cover | ||
|
||
try: | ||
request = repository.request_nlp_manual_evaluate( # pragma: no cover | ||
user_authorization, serializer.data | ||
) | ||
except DjangoValidationError as e: | ||
raise APIException(e.message, code=400) | ||
data = request.data | ||
response = [] | ||
version_languages = RepositoryVersionLanguage.objects.filter(repository_version__pk=data.get("repository_version")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line is long exceeding 119 characters, you can adjust it like this:
|
||
for version_language in version_languages: | ||
if not repository.have_at_least_one_test_phrase_registered(version_language.language): | ||
continue | ||
if "language" in data: | ||
data["language"] = version_language.language | ||
else: | ||
data.update({"language": version_language.language}) | ||
serializer = EvaluateSerializer(data=data) # pragma: no cover | ||
serializer.is_valid(raise_exception=True) # pragma: no cover | ||
|
||
if request.status_code != status.HTTP_200_OK: # pragma: no cover | ||
raise APIException( | ||
{"status_code": request.status_code}, code=request.status_code | ||
) # pragma: no cover | ||
return Response(request.json()) # pragma: no cover | ||
try: | ||
nlp_request = repository.request_nlp_manual_evaluate( # pragma: no cover | ||
user_authorization, serializer.data | ||
) | ||
except DjangoValidationError as e: | ||
raise APIException(e.message, code=400) | ||
|
||
if nlp_request.status_code != status.HTTP_200_OK: # pragma: no cover | ||
raise APIException( | ||
{"status_code": nlp_request.status_code}, code=nlp_request.status_code | ||
) # pragma: no cover | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line contains whitespace: for more details: https://www.flake8rules.com/rules/W293.html |
||
nlp_response = nlp_request.json() | ||
|
||
evaluate_id = nlp_response.get("evaluate_id") | ||
evaluate_result = RepositoryEvaluateResult.objects.get(pk=evaluate_id) | ||
if request.data.get("evaluate_type", False): | ||
evaluate_result.evaluate_type = request.data.get("evaluate_type") | ||
evaluate_result.save() | ||
logs = json.loads(evaluate_result.log) | ||
intent_count = 0 | ||
intent_success = 0 | ||
|
||
for res in logs: | ||
intent_count += 1 | ||
intent_success += 1 if res.get("intent_status") == "success" else 0 | ||
|
||
result_data = { | ||
"accuracy": evaluate_result.intent_results.accuracy, | ||
"intents_count": intent_count, | ||
"intents_success": intent_success, | ||
"evalute_type": evaluate_result.evaluate_type, | ||
} | ||
nlp_response.update(result_data) | ||
response.append(nlp_response) | ||
return Response(response) # pragma: no cover | ||
|
||
@action( | ||
detail=True, | ||
|
@@ -789,6 +827,47 @@ def check_can_automatic_evaluate(self, request, **kwargs): | |
return Response(response) # pragma: no cover | ||
|
||
|
||
@action( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line contains two blank lines, by default we use 1 line for methods and 2 for classes. |
||
detail=True, | ||
methods=["GET"], | ||
url_name="get-recommendations-repository", | ||
) | ||
def get_recommendations_repository(self, request, **kwargs): | ||
repository = self.get_object() | ||
user_authorization = repository.get_user_authorization(request.user) | ||
if not user_authorization.can_write: | ||
raise PermissionDenied() | ||
|
||
examples = RepositoryExample.objects.filter(repository_version_language__repository_version__repository=repository) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line is long exceeding 119 characters, you can adjust it like this:
|
||
intents = {} | ||
sum_intents = 0 | ||
qnt_intents = 0 | ||
sum_distance = 0 | ||
|
||
for example in examples: | ||
if example.intent.text not in intents: | ||
intents[example.intent.text] = {"text": [], "count": 0, "distance": 0} | ||
intents[example.intent.text]["text"].append(example.text) | ||
intents[example.intent.text]["count"] += 1 | ||
sum_intents += 1 | ||
qnt_intents += 1 | ||
response = {"add_phares_to": [], "more_diversity": []} | ||
avg_intents = (sum_intents/qnt_intents) | ||
for intent in intents: | ||
for i in range(0, intents[intent]['count']): | ||
for j in range(i, intents[intent]['count']): | ||
intents[intent]['distance'] += levenshtein_distance(intents[intent]['text'][i], intents[intent]['text'][j]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line is long exceeding 119 characters, you can adjust it like this:
|
||
sum_distance += intents[intent]['distance'] | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line contains whitespace: for more details: https://www.flake8rules.com/rules/W293.html |
||
avg_distance = sum_distance / qnt_intents | ||
for intent in intents: | ||
if intents[intent]['count'] < avg_intents: | ||
response["add_phares_to"].append(intent) | ||
if intents[intent]['distance'] < avg_distance: | ||
response["more_diversity"].append(intent) | ||
return Response(data=response) | ||
|
||
|
||
@method_decorator( | ||
name="list", | ||
decorator=swagger_auto_schema( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Generated by Django 3.2.15 on 2023-05-17 17:11 | ||
|
||
from django.db import migrations, models | ||
import uuid | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('common', '0117_alter_zeroshotoptions_option_uuid'), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name='repositoryevaluateresult', | ||
name='evaluate_type', | ||
field=models.PositiveIntegerField(blank=True, choices=[(0, 'manual'), (1, 'automatic')], default=0, null=True, verbose_name='role'), | ||
), | ||
migrations.AlterField( | ||
model_name='zeroshotoptions', | ||
name='option_uuid', | ||
field=models.UUIDField(default=uuid.UUID('4e0cf37b-a6b2-40fd-a3cb-cce5bda8e14e')), | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -512,3 +512,29 @@ def check_module_permission(claims, user): | |
"categories_list", | ||
"repository_type", | ||
] | ||
|
||
|
||
def levenshtein_distance(str1, str2): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lines with blank lines after separators:
|
||
size_x = len(str1) + 1 | ||
size_y = len(str2) + 1 | ||
matrix = np.zeros ((size_x, size_y)) | ||
for x in range(size_x): | ||
matrix [x, 0] = x | ||
for y in range(size_y): | ||
matrix [0, y] = y | ||
|
||
for x in range(1, size_x): | ||
for y in range(1, size_y): | ||
if str1[x-1] == str2[y-1]: | ||
matrix [x,y] = min( | ||
matrix[x-1, y] + 1, | ||
matrix[x-1, y-1], | ||
matrix[x, y-1] + 1 | ||
) | ||
else: | ||
matrix [x,y] = min( | ||
matrix[x-1,y] + 1, | ||
matrix[x-1,y-1] + 1, | ||
matrix[x,y-1] + 1 | ||
) | ||
return (matrix[size_x - 1, size_y - 1]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this line contains trailing whitespace in:
field_name="type",
for more details: https://www.flake8rules.com/rules/W291.html